# Correct Z range for gallery view
## Philipp Steen 2022
This script adjusts the z coordinates for all localizations (or RESI localizations) by subtracting a constant value per group (pick) so that the displayed z range has consistent color coding for all picks.

In [9]:
import pandas as pd
import h5py

def Import(input_path): #Imports clustered .hdf5 from Picasso. Requires group info.
    fulltable = pd.read_hdf(input_path, key = 'locs')
    fulltable.sort_values(by=['group', 'frame'])
    return(fulltable)

path_resi = "resi-file-path.hdf5"
path_dnapaint = "dna-paint-file-path.hdf5"

path = "destination-path/"

def picasso_hdf5(df, hdf5_fname, hdf5_oldname, path):
    
    """
    This function recieves a pandas data frame coming from a Picasso .hdf5
    file but that has been modified somehow (e.g. coordinates rotated, 
    groups edited, etc) and formats it to be compatible with Picasso. 
    It also creates the necessary .yaml file.
    
    It is meant to be used in a folder that contains the original Picasso
    .hdf5 file and the .yaml file.
    
    - df: pandas data frame object
    - hdf5_fname: the desired filename for the Picasso-compatible .hdf5 file
    - hdf5_oldname: name of the original Picasso file that was modified
    - path: the absolute path containing the path to the file's folder
    
    Note: include ".hdf5" in the file names
    Warning: the .yaml file is basically a copy of the old one. If important
    information should be added to the new .yaml file this function should be
    modified
    
    """

    labels = list(df.keys())
    df_picasso = df.reindex(columns=labels, fill_value=1)
    locs = df_picasso.to_records(index = False)

    # Saving data
    
    hf = h5py.File(path + hdf5_fname, 'w')
    hf.create_dataset('locs', data=locs)
    hf.close()

    # YAML saver

    yaml_oldname = path + hdf5_oldname.replace('.hdf5', '.yaml')
    yaml_newname = path + hdf5_fname.replace('.hdf5', '.yaml')
    
    yaml_file_info = open(yaml_oldname, 'r')
    yaml_file_data = yaml_file_info.read()
    
    yaml_newfile = open(yaml_newname, 'w')
    yaml_newfile.write(yaml_file_data)
    yaml_newfile.close()   
    
    print('New Picasso-compatible .hdf5 file and .yaml file successfully created.')

In [10]:
df_resi = Import(path_resi)
df_dnapaint = Import(path_dnapaint)

In [26]:
group_ids = df_resi["group"].unique()

filter_i = []

for i in group_ids:
    resi_slice = df_resi[df_resi["group"] == i]
    resi_min_z = resi_slice.groupby("group").min()
    resi_max_z = resi_slice.groupby("group").max()
    
    if (abs((float(resi_min_z["z"]) - float(resi_max_z["z"]))) <= 140):
        df_resi.loc[df_resi.group == i, 'z'] = df_resi.z - float(resi_min_z["z"])
        df_dnapaint.loc[df_dnapaint.group == i, 'z'] = df_dnapaint.z - float(resi_min_z["z"])
        filter_i.append(i)

df_resi_filter = df_resi[df_resi['group'].isin(filter_i)]
df_dnapaint_filter = df_dnapaint[df_dnapaint['group'].isin(filter_i)]

In [27]:
picasso_hdf5(df_resi_filter, "resi_z_corrected.hdf5", "joined_RESI_rounds1-4_RESI.hdf5", "path/")

picasso_hdf5(df_dnapaint_filter, "dnapaint_z_corrected.hdf5", "joined_RESI_rounds_1-4_DNAPAINT.hdf5", "path/")

New Picasso-compatible .hdf5 file and .yaml file successfully created.
New Picasso-compatible .hdf5 file and .yaml file successfully created.
