In [7]:
import os
import pandas as pd

In [8]:
def extract_gene_comp_exp_per_chr(structure_dir, gene_exp_dir):
    # Load expression comparison files
    expr_files = [
        f for f in os.listdir(gene_exp_dir)
        if f.endswith('.csv') and f.startswith('mapped')
    ]

    for expr_file in expr_files:
        expr_path = os.path.join(gene_exp_dir, expr_file)
        df = pd.read_csv(expr_path)

        # Ensure there's a chromosome column
        if 'chromosome' not in df.columns:
            raise ValueError(f"No 'chromosome' column found in {expr_file}")

        for chr_folder_name in os.listdir(structure_dir):
            chr_path = os.path.join(structure_dir, chr_folder_name)

            if not os.path.isdir(chr_path) or not chr_folder_name.startswith('chr'):
                continue

            print(f"Processing: {expr_file} → {chr_folder_name}")

            # Filter expression data by chromosome
            chr_df = df[df['chromosome'] == chr_folder_name]

            if chr_df.empty:
                print(f"  → No genes found for {chr_folder_name}")
                continue

            # Save filtered result inside chromosome folder
            out_file_name = expr_file.replace('.csv', f'_{chr_folder_name}.csv')
            out_path = os.path.join(chr_path, out_file_name)
            chr_df.to_csv(out_path, index=False)


In [9]:
structure_dir = 'data/green_monkey/all_structure_files'
gene_exp_dir = 'data/green_monkey/rna_comparison/comparison/'

extract_gene_comp_exp_per_chr(structure_dir, gene_exp_dir)


Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr23
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr24
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr12
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr15
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr14
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr13
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr25
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr22
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr4
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr3
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr2
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr5
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr18
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr27
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr20
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr29
Processing: mapped_vacv12_vs_untr12_DESEQ_norm.csv → chr16
P