In [2]:
import os
import pandas as pd

base_path = "data/green_monkey/narrowPeak-compartmentBed-raw/experiments/"
conditions = ["untr", "vacv"]
time_points = ["12hrs", "18hrs", "24hrs"]

# Load chromosome mapping CSV (Contig → Chromosome)
chr_map_path = "data/green_monkey/annotation/Chlorocebus_sabeus_mva.genome.map.csv"
chr_map_df = pd.read_csv(chr_map_path)[["Contig", "Chromosome"]]
chr_map_df.rename(columns={
    "Contig": "chromosome_nc",
    "Chromosome": "chromosome"
}, inplace=True)

# Standard narrowPeak column names
columns = [
    "chromosome_nc", "start", "end", "name", "score", "strand",
    "signalValue", "pValue", "qValue", "peak"
]

output_folder = "data/green_monkey/ata-sec/"
os.makedirs(output_folder, exist_ok=True)

for cond in conditions:
    for hour in time_points:
        condition_path = os.path.join(base_path, cond, hour)
        if not os.path.isdir(condition_path):
            print(f"Skipping missing folder: {condition_path}")
            continue

        for fname in os.listdir(condition_path):
            if not fname.endswith(".narrowPeak"):
                continue

            file_path = os.path.join(condition_path, fname)

            try:
                df = pd.read_csv(file_path, sep="\t", header=None, names=columns)

                # Merge with mapping to add actual chromosome name
                df = df.merge(chr_map_df, on="chromosome_nc", how="left")

                # Reorder columns
                df = df[[
                    "chromosome", "start", "end", "chromosome_nc", "name", "score", "strand",
                    "signalValue", "pValue", "qValue", "peak"
                ]]

                output_name = f"{hour}_{cond}_narrowPeak.csv"
                output_path = os.path.join(output_folder, output_name)
                df.to_csv(output_path, index=False)
                print(f"Converted {fname} → {output_name}")
            except Exception as e:
                print(f"Failed to read {file_path}: {e}")

print("Done.")


Converted Untr_A_12HPI_2803_001_autosomes_peaks.narrowPeak → 12hrs_untr_narrowPeak.csv
Converted Untr_A_18HPI_2803_003_autosomes_peaks.narrowPeak → 18hrs_untr_narrowPeak.csv
Converted Untr_A_24HPI_2803_005_autosomes_peaks.narrowPeak → 24hrs_untr_narrowPeak.csv
Converted Vacv_A_12HPI_2803_002_autosomes_peaks.narrowPeak → 12hrs_vacv_narrowPeak.csv
Converted Vacv_A_18HPI_2803_004_autosomes_peaks.narrowPeak → 18hrs_vacv_narrowPeak.csv
Converted Vacv_A_24HPI_2803_006_autosomes_peaks.narrowPeak → 24hrs_vacv_narrowPeak.csv
Done.
