In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.expand_frame_repr', False)
# %%
def load_cpg_enrichment_data(file_path):
    """
    Loads the CpG enrichment data from the specified CSV file into a pandas DataFrame.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        pandas.DataFrame: A DataFrame containing the CpG enrichment data.
    """
    try:
        df = pd.read_csv(file_path)
        return df
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [2]:
STUDY = "Neu"
base_dir = "D:/Github/SRF_MeCP2_cut_tag/iterative_alternative/results/no_dedup/cpg_enrichment"
file_path = f"{base_dir}/{STUDY}/broad/cpg_enrichment_1_rep_in_peaks/lists/up_enriched_signal_2_exo_over_20.csv"
cpg_data = load_cpg_enrichment_data(file_path)
print(f"Shape of cpg_data: {cpg_data.shape}")
print(f"Number of enriched CpGs: {len(cpg_data)}")
cpg_data.head()

Shape of cpg_data: (842, 24)
Number of enriched CpGs: 842


Unnamed: 0,chr,start,end,exo_signal,endo_signal,enrichment,pvalue,binding_type,binding_type_by_peaks,significant,exo_replicates_with_signal,endo_replicates_with_signal,exo_replicate_signals,endo_replicate_signals,region_length,cpg_length,cpg_score,cpg_name,exo_replicates_with_peaks,endo_replicates_with_peaks,exo_peak_scores_by_rep,endo_peak_scores_by_rep,region_start,region_end
0,chr18,38199080,38199450,47.646655,14.031488,3.395695,0.2,both,exo_only,False,3,2,"26.199222134673263,43.37834898291314,73.362393...","4.695661316811525,23.3673139631933",2903,370,CpG:,876,3,0,5.59107;9.50013;5.57526,,38197650,38200553
1,chr11,72961075,72961596,22.120303,6.560105,3.371943,1.0,both,endo_only,False,3,1,"9.65566662493865,7.240367664359294,49.46487477...","0.0,13.120210752112108",1551,521,CpG:,1141,0,1,,3.48273,72960400,72961951
2,chr9,107872344,107872863,131.170561,38.902583,3.37177,0.2,both,both,False,3,2,"94.47108924615362,80.4085471444797,218.6320454...","31.957984001525325,45.847182126448764",1158,519,CpG:,1408,3,1,13.0558;28.3603;17.0195,11.6957,107872158,107873316
3,chr2,31670686,31671043,175.978559,52.200969,3.371174,0.2,both,both,False,3,2,"192.87088275600124,130.38788000128903,204.6769...","4.708510974812845,99.69342743567978",777,357,CpG:,826,3,1,13.8754;21.5617;18.8811,11.2891,31670403,31671180
4,chr11,116694238,116694929,24.08717,7.148146,3.369709,0.2,both,both,False,3,2,"19.474175663385687,31.399402107457497,21.38793...","8.281002837316041,6.015289014167124",1901,691,CpG:,1475,3,1,4.3354;6.24104;3.79895,2.70518,116693778,116695679


In [3]:
STUDY = "NSC"
base_dir = "D:/Github/SRF_MeCP2_cut_tag/iterative_alternative/results/no_dedup/cpg_enrichment"
file_path = f"{base_dir}/{STUDY}/broad/cpg_enrichment_1_rep_in_peaks/lists/up_enriched_signal_2_exo_over_20.csv"
cpg_data = load_cpg_enrichment_data(file_path)
print(f"Shape of cpg_data: {cpg_data.shape}")
print(f"Number of enriched CpGs: {len(cpg_data)}")
cpg_data.head()
# %%


Shape of cpg_data: (2287, 24)
Number of enriched CpGs: 2287


Unnamed: 0,chr,start,end,exo_signal,endo_signal,enrichment,pvalue,binding_type,binding_type_by_peaks,significant,exo_replicates_with_signal,endo_replicates_with_signal,exo_replicate_signals,endo_replicate_signals,region_length,cpg_length,cpg_score,cpg_name,exo_replicates_with_peaks,endo_replicates_with_peaks,exo_peak_scores_by_rep,endo_peak_scores_by_rep,region_start,region_end
0,chr8,110079697,110080128,28.799846,0.239928,120.035254,0.5,both,exo_only,False,3,1,"40.5260954504943,30.03392121076128,15.83952005...","0.71978468010576,0.0,0.0",1046,431,CpG:,1424,2,0,7.02735;5.4755,,110079672,110080718
1,chr9,24695948,24696360,26.633257,0.261054,102.021948,0.2,both,exo_only,False,3,2,"1.9792640686035157,48.39018314068134,29.530322...","0.6303655184232272,0.1527970424065223,0.0",325,412,CpG:,773,2,0,3.76836;5.87655,,24695936,24696261
2,chr7,43676591,43676883,23.08821,0.230463,100.181804,0.666667,both,exo_only,False,2,1,"47.78890505838676,0.0,21.475725704396265","0.0,0.6913893321562096,0.0",676,292,CpG:,918,1,0,5.70533,,43676796,43677472
3,chr12,57575938,57576606,28.693085,0.29414,97.549186,0.666667,both,exo_only,False,2,1,"83.96897449316802,0.0,2.1102819707658558","0.0,0.8824190122109873,0.0",432,668,CpG:,1024,1,0,6.37187,,57575631,57576063
4,chr10,80306170,80306497,27.861158,0.294861,94.489268,0.5,both,exo_only,False,3,1,"1.1643986586667918,48.50539752650549,33.913678...","0.884581676213029,0.0,0.0",579,327,CpG:,1197,1,0,6.92817,,80306217,80306796
