In [1]:
import pandas as pd
import os

In [2]:
structure_file_path = 'data/GSE248049/all_structure_files'

resolution = '100000'


In [3]:
def calculate_start_point(midpoint, endpoint):
    start_x = (2 * midpoint[0]) - endpoint[0]
    start_y = (2 * midpoint[1]) - endpoint[1]
    start_z = (2 * midpoint[2]) - endpoint[2]
    
    return [start_x, start_y, start_z]

In [8]:
def add_initial_id_and_position(atom_file_path):
    chr_folder_path = os.path.join(atom_file_path)

    for chr_folder_name in os.listdir(chr_folder_path):
        chr_path = os.path.join(chr_folder_path, chr_folder_name)

        # Check if the current item is a directory starting with 'chr'
        if os.path.isdir(chr_path) and chr_folder_name.startswith('chr'):  
            print(f"Processing folder: {chr_folder_name}")

            # List all CSV files in the folder, ignoring those already containing '_with_id0'
            csv_files = [f for f in os.listdir(chr_path) if f.endswith('.csv') and '_with_id0' not in f]

            for csv_file in csv_files:
                old_csv_path = os.path.join(chr_path, csv_file)

                # Normalize filename: Replace "-" with "_" in the input filename
                normalized_csv_file = csv_file.replace('-', '_')
                new_csv_path = os.path.join(chr_path, normalized_csv_file)

                # Rename the file if needed
                if old_csv_path != new_csv_path:
                    os.rename(old_csv_path, new_csv_path)
                    print(f"Renamed {old_csv_path} → {new_csv_path}")

                # Read the renamed CSV file
                df = pd.read_csv(new_csv_path)

                # Get coordinates of id1 and id2
                id1_midpoint = df.loc[df['id'] == 1, ['x', 'y', 'z']].values.flatten().tolist()
                id2_endpoint = df.loc[df['id'] == 2, ['x', 'y', 'z']].values.flatten().tolist()
                
                startpoint = calculate_start_point(id1_midpoint, id2_endpoint)
                
                # Insert id0 at the beginning of the DataFrame
                id0_row = pd.DataFrame({
                    'id': [0],
                    'x': startpoint[0],
                    'y': startpoint[1],
                    'z': startpoint[2],
                })
                
                df = pd.concat([id0_row, df]).reset_index(drop=True)
                
                # Generate new output filename with "_with_id0"
                base_name = normalized_csv_file.replace('.csv', '')  # Remove ".csv"
                new_file_name = f"{base_name}_with_id0.csv"
                save_file_path = os.path.join(chr_path, new_file_name)

                df.to_csv(save_file_path, index=False)
                print(f"File saved to {save_file_path}")



add_initial_id_and_position(structure_file_path)

Processing folder: chr23
Renamed data/GSE248049/all_structure_files/chr23/structure_12hrs-vacv.csv → data/GSE248049/all_structure_files/chr23/structure_12hrs_vacv.csv
File saved to data/GSE248049/all_structure_files/chr23/structure_12hrs_vacv_with_id0.csv
File saved to data/GSE248049/all_structure_files/chr23/structure_18hrs_untr_with_id0.csv
Renamed data/GSE248049/all_structure_files/chr23/structure_18hrs-vacv.csv → data/GSE248049/all_structure_files/chr23/structure_18hrs_vacv.csv
File saved to data/GSE248049/all_structure_files/chr23/structure_18hrs_vacv_with_id0.csv
File saved to data/GSE248049/all_structure_files/chr23/structure_12hrs_untr_with_id0.csv
Renamed data/GSE248049/all_structure_files/chr23/structure_24hrs-vacv.csv → data/GSE248049/all_structure_files/chr23/structure_24hrs_vacv.csv
File saved to data/GSE248049/all_structure_files/chr23/structure_24hrs_vacv_with_id0.csv
File saved to data/GSE248049/all_structure_files/chr23/structure_24hrs_untr_with_id0.csv
Processing fold

File saved to data/GSE248049/all_structure_files/chr3/structure_24hrs_vacv_with_id0.csv
File saved to data/GSE248049/all_structure_files/chr3/structure_24hrs_untr_with_id0.csv
Processing folder: chr2
Renamed data/GSE248049/all_structure_files/chr2/structure_12hrs-vacv.csv → data/GSE248049/all_structure_files/chr2/structure_12hrs_vacv.csv
File saved to data/GSE248049/all_structure_files/chr2/structure_12hrs_vacv_with_id0.csv
File saved to data/GSE248049/all_structure_files/chr2/structure_18hrs_untr_with_id0.csv
Renamed data/GSE248049/all_structure_files/chr2/structure_18hrs-vacv.csv → data/GSE248049/all_structure_files/chr2/structure_18hrs_vacv.csv
File saved to data/GSE248049/all_structure_files/chr2/structure_18hrs_vacv_with_id0.csv
File saved to data/GSE248049/all_structure_files/chr2/structure_12hrs_untr_with_id0.csv
Renamed data/GSE248049/all_structure_files/chr2/structure_24hrs-vacv.csv → data/GSE248049/all_structure_files/chr2/structure_24hrs_vacv.csv
File saved to data/GSE248049

File saved to data/GSE248049/all_structure_files/chr26/structure_12hrs_vacv_with_id0.csv
File saved to data/GSE248049/all_structure_files/chr26/structure_18hrs_untr_with_id0.csv
Renamed data/GSE248049/all_structure_files/chr26/structure_18hrs-vacv.csv → data/GSE248049/all_structure_files/chr26/structure_18hrs_vacv.csv
File saved to data/GSE248049/all_structure_files/chr26/structure_18hrs_vacv_with_id0.csv
File saved to data/GSE248049/all_structure_files/chr26/structure_12hrs_untr_with_id0.csv
Renamed data/GSE248049/all_structure_files/chr26/structure_24hrs-vacv.csv → data/GSE248049/all_structure_files/chr26/structure_24hrs_vacv.csv
File saved to data/GSE248049/all_structure_files/chr26/structure_24hrs_vacv_with_id0.csv
File saved to data/GSE248049/all_structure_files/chr26/structure_24hrs_untr_with_id0.csv
Processing folder: chr7
Renamed data/GSE248049/all_structure_files/chr7/structure_12hrs-vacv.csv → data/GSE248049/all_structure_files/chr7/structure_12hrs_vacv.csv
File saved to data