In [1]:
import os
import pandas as pd


In [8]:
# Paths
annotation_path = "data/green_monkey/annotation/gene_list.csv"
expression_folder = "data/green_monkey/rna_comparison/"  
output_folder = "data/green_monkey/rna_comparison/green_monkey_expressions_mapped"
os.makedirs(output_folder, exist_ok=True)

# Read gene info
df_gene = pd.read_csv(annotation_path)

# Clean up whitespace just in case
df_gene['gene_name'] = df_gene['gene_name'].str.strip()

# Loop through each expression file
for file in os.listdir(expression_folder):
    if not file.endswith('.csv'):
        continue
    
    expr_path = os.path.join(expression_folder, file)
    df_expr = pd.read_csv(expr_path, na_values=['NA'])  # Treat 'NA' as real missing
    
    # Clean and select only necessary columns
    df_expr = df_expr[['Row.names', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj']]
    df_expr.rename(columns={'Row.names': 'gene_name'}, inplace=True)
    df_expr['gene_name'] = df_expr['gene_name'].str.strip()

    # Merge
    merged = pd.merge(df_gene, df_expr, on='gene_name', how='left')
    
    # Replace missing expression values with 'NA'
    for col in ['baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj']:
        if col in merged.columns:
            merged[col] = merged[col].fillna("NA")
    
    # Save output
    save_path = os.path.join(output_folder, f'mapped_{file}')
    merged.to_csv(save_path, index=False)
    print(f"Saved: {save_path}")


Saved: data/green_monkey/rna_comparison/green_monkey_expressions_mapped/mapped_vacv24_vs_untr24_DESEQ_norm.csv
Saved: data/green_monkey/rna_comparison/green_monkey_expressions_mapped/mapped_untr24_vs_untr12_DESEQ_norm.csv
Saved: data/green_monkey/rna_comparison/green_monkey_expressions_mapped/mapped_vacv18_vs_vacv12_DESEQ_norm.csv
Saved: data/green_monkey/rna_comparison/green_monkey_expressions_mapped/mapped_vacv18_vs_untr18_DESEQ_norm.csv
Saved: data/green_monkey/rna_comparison/green_monkey_expressions_mapped/mapped_untr24_vs_untr18_DESEQ_norm.csv
Saved: data/green_monkey/rna_comparison/green_monkey_expressions_mapped/mapped_vacv24_vs_vacv12_DESEQ_norm.csv
Saved: data/green_monkey/rna_comparison/green_monkey_expressions_mapped/mapped_untr18_vs_untr12_DESEQ_norm.csv
Saved: data/green_monkey/rna_comparison/green_monkey_expressions_mapped/mapped_vacv24_vs_vacv18_DESEQ_norm.csv
Saved: data/green_monkey/rna_comparison/green_monkey_expressions_mapped/mapped_vacv12_vs_untr12_DESEQ_norm.csv
