In [18]:
##################################################################
#### step 1 각 CNV 분석 툴 결과 파일 -> bed 파일(dataframe)로 파싱
##################################################################

import pandas as pd
import pybedtools
import warnings
warnings.filterwarnings(action='ignore')
from IPython.core.interactiveshell import InteractiveShell

import pandas as pd
pd.options.display.max_seq_items = 2000

def estd199_1000genomes(sample):
    estd_cnv = pd.read_csv(f"/data/analysis/project/231211_WXS_CNV/public_data/internal_standard_NA12878/ext_cnv_sample/estd199_{sample}.csv", header=None, sep="\t", skiprows=3)
    estd_cnv = estd_cnv.iloc[:, [0,3,4,2]]
    estd_cnv['size'] = round((estd_cnv.iloc[:, 2] - estd_cnv.iloc[:, 1])/1000, 2)
    estd_cnv.columns = ["chr", "start", "end", "CNV", "size(kb)"]
    #print(f'1000genome >> del : {estd_cnv.loc[estd_cnv["CNV"] == "deletion"].shape[0]}, dup : {estd_cnv.loc[estd_cnv["CNV"] == "duplication"].shape[0]}')
    
    return estd_cnv


def cnvkit(sample):
    # CN 2 (normal), CN 1 (single-copy loss), CN 0 (comlete loss), CN 3 (single-copy gain)
    cnvkit_df = pd.read_csv(f"/data/analysis/project/231211_WXS_CNV/public_data/BAM/cnvkit/{sample}.call.cns", sep="\t", header=0)
    cnvkit_df = cnvkit_df.loc[:, ["chromosome", "start", "end", "cn"]]
    cnvkit_df = cnvkit_df.loc[cnvkit_df.cn != 2 ]
    cnvkit_df['size'] = round((cnvkit_df['end'] - cnvkit_df['start'])/1000, 2)
    cnvkit_df.columns = ["cnvkit_chr", "cnvkit_start", "cnvkit_end", "cnvkit_cn", "cnvkit_size(kb)"]
    #print(f'cnvkit >> del : {cnvkit_df.loc[cnvkit_df["cnvkit_cn"] < 2].shape[0]}, dup : {cnvkit_df.loc[cnvkit_df["cnvkit_cn"] > 2].shape[0]}')
    
    return cnvkit_df


def codex(sample):
    codex = pd.read_csv("/data/analysis/project/231211_WXS_CNV/public_data/BAM/codex/CODEX_frac.txt", sep="\t", header=0)
    codex_df = codex.loc[codex.sample_name == sample].drop(columns=["sample_name"])
    codex_df = codex_df.loc[:, ['chr', 'st_bp', 'ed_bp', 'cnv']]
    codex_df['codex_size'] = round((codex_df['ed_bp'] - codex_df['st_bp'])/1000, 2)
    codex_df.columns = ["codex_chr", "codex_start", "codex_end", "codex_cnv", "codex_size(kb)"]
    #print(f'codex >> del : {codex_df.loc[codex_df["codex_cnv"] == "del" ].shape[0]}, dup : {codex_df.loc[codex_df["codex_cnv"] == "dup"].shape[0]}')
    
    return codex_df


def canoes(sample):
    canoes = pd.read_csv("/data/analysis/project/231211_WXS_CNV/public_data/BAM/canoes/results_11sample.csv", sep=" ", header=0)
    
    canoes_df = canoes.loc[canoes.SAMPLE == f'{sample}.bam']
    
    canoes_INTERVAL = canoes_df.INTERVAL.str.split(":", expand=True).copy()
    canoes_df["chr"] = canoes_INTERVAL[0]
    canoes_df["start"] = canoes_INTERVAL[1].str.split("-", expand=True)[0].astype(int)
    canoes_df["end"] = canoes_INTERVAL[1].str.split("-", expand=True)[1].astype(int)
    canoes_df = canoes_df.loc[:, ("chr", "start", "end", "CNV")]
    canoes_df['canoes_size'] = round((canoes_df['end'] - canoes_df['start'])/1000, 2)
    canoes_df.columns = ["canoes_chr", "canoes_start", "canoes_end", "canoes_cnv", "canoes_size(kb)"]
    #print(f'canoes >> del : {canoes_df.loc[canoes_df["canoes_cnv"] == "DEL" ].shape[0]}, dup : {canoes_df.loc[canoes_df["canoes_cnv"] == "DUP"].shape[0]}')
    
    return canoes_df


def xhmm(sample):
    #xhmm_df = pd.read_csv(f"/data/analysis/project/231211_WXS_CNV/public_data/BAM/xhmm/gatk_RD_test2/sample_split/{sample}.xhmm.bed", sep="\t", header=None)
    xhmm_df = pd.read_csv(f"/data/analysis/project/231211_WXS_CNV/xhmm_GIAB_1000G_test/output/sample_split/{sample}.xhmm.bed", sep="\t", header=None)
    xhmm_df = xhmm_df.iloc[:, [0,1,2,3]]
    xhmm_df.columns = ["chr", "start", "end", "cnv"]
    
    xhmm_df['xhmm_size'] = (xhmm_df['end'].astype(int) - xhmm_df['start'].astype(int))/1000
    xhmm_df.columns = ["xhmm_chr", "xhmm_start", "xhmm_end", "xhmm_cnv", 'xhmm_size(kb)']
    #print(f'xhmm >> del : {xhmm_df.loc[xhmm_df["xhmm_cnv"] == "DEL" ].shape[0]}, dup : {xhmm_df.loc[xhmm_df["xhmm_cnv"] == "DUP"].shape[0]}')
    
    return xhmm_df



In [49]:
def giab_intersect():
    ## giab_agilentV5
    giab_V5_df = pd.read_csv(f"/data/analysis/project/231211_WXS_CNV/GIAB_1000G_test/reference/HG002_SVs_Tier1_v0.6.parse.include_Agilent_V5.bed", sep="\t", header=None)
    giab_V5_df.columns = ["HG2_chr","HG2_start", "HG2_end", "HG2_size(kb)","V5_chr", "V5_start", "V5_end", "V5_gene"]
    display(giab_V5_df.head())

    ## xhmm_HG002
    xhmm_HG002_df = pd.read_csv("/data/analysis/project/231211_WXS_CNV/GIAB_1000G_test/xhmm_output/sample_split/Sample_Diag-excap51-HG002-EEogPU.xhmm.bed", sep="\t", header=None)
    xhmm_HG002_df.columns = ["xhmm_chr", "xhmm_start", "xhmm_end", "xhmm_cnv", "xhmm_size(kb)", "xhmm_sample"]
    #display(xhmm_HG002_df.head())

    ## cnvkit_HG002
    # CN 2 (normal), CN 1 (single-copy loss), CN 0 (comlete loss), CN 3 (single-copy gain)
    cnvkit_df = pd.read_csv(f"/data/analysis/project/231211_WXS_CNV/GIAB_1000G_test/cnvkit_output/HG002.call.cns", sep="\t", header=0)
    cnvkit_df = cnvkit_df.loc[:, ["chromosome", "start", "end", "log2", "cn"]]
    cnvkit_df_del_dup = cnvkit_df.loc[cnvkit_df.cn != 2 ]
    display(cnvkit_df_del_dup.head(20))
    cnvkit_df['size'] = round((cnvkit_df['end'] - cnvkit_df['start'])/1000, 2)
    cnvkit_df.columns = ["cnvkit_chr", "cnvkit_start", "cnvkit_end", "cnvkit_log2", "cnvkit_cn", "cnvkit_size(kb)"]
    #print(f'cnvkit >> del : {cnvkit_df.loc[cnvkit_df["cnvkit_cn"] < 2].shape[0]}, dup : {cnvkit_df.loc[cnvkit_df["cnvkit_cn"] > 2].shape[0]}')    

    giab_bed_file = pybedtools.BedTool.from_dataframe(giab_V5_df)
    xhmm_bed_file = pybedtools.BedTool.from_dataframe(xhmm_HG002_df)
    cnvkit_bed_file = pybedtools.BedTool.from_dataframe(cnvkit_df)

    its_df1 = giab_bed_file.intersect(xhmm_bed_file, wa=True, wb=True).to_dataframe(header=None)
    its_df2 = giab_bed_file.intersect(cnvkit_bed_file, wa=True, wb=True).to_dataframe(header=None)
    its_df2.columns = giab_V5_df.columns.tolist() + cnvkit_df.columns.tolist()
    display(its_df2.head(100))

giab_intersect()


Unnamed: 0,HG2_chr,HG2_start,HG2_end,HG2_size(kb),V5_chr,V5_start,V5_end,V5_gene
0,1,1181675,1182435,0.76,1,1181871,1182048,CCDS30554
1,1,1220614,1222554,1.94,1,1222116,1222662,CCDS44037
2,1,249206989,249214309,7.32,1,249211065,249212623,CCDS31129
3,2,241448719,241449289,0.57,2,241448743,241448976,CCDS2536
4,3,89257836,89260742,2.906,3,89258964,89259712,CCDS46875


Unnamed: 0,chromosome,start,end,log2,cn
21,8,7717829,7718296,-2.70713,0
30,11,55370954,55418737,-0.471349,1
33,12,11243865,11244125,-9.2175,0
34,12,11244165,11244605,-27.6349,0
42,14,22919040,22982000,-0.643317,1
57,19,55340885,55351171,-24.6135,0
70,X,150500,2692870,-0.048414,1
71,X,2699992,47990806,-0.378742,1
72,X,47991636,48316662,-0.113829,1
73,X,48317162,52520688,-0.391499,1


Unnamed: 0,HG2_chr,HG2_start,HG2_end,HG2_size(kb),V5_chr,V5_start,V5_end,V5_gene,cnvkit_chr,cnvkit_start,cnvkit_end,cnvkit_log2,cnvkit_cn,cnvkit_size(kb)
0,1,1181675,1182435,0.760,1,1181871,1182048,CCDS30554,1,65509,121611853,0.000198,2,121546.34
1,1,1220614,1222554,1.940,1,1222116,1222662,CCDS44037,1,65509,121611853,0.000198,2,121546.34
2,1,249206989,249214309,7.320,1,249211065,249212623,CCDS31129,1,142509878,249212623,0.004819,2,106702.74
3,2,241448719,241449289,0.570,2,241448743,241448976,CCDS2536,2,238949843,243037218,-0.024343,2,4087.38
4,3,89257836,89260742,2.906,3,89258964,89259712,CCDS46875,3,150500,90584934,0.001752,2,90434.43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,20,62192203,62196825,4.622,20,62192416,62195221,CCDS13527,20,60864196,62959313,-0.048936,2,2095.12
63,21,46945825,46946213,0.388,21,46945758,46945924,CCDS56217,21,9371758,48084354,0.005634,2,38712.60
64,21,47422870,47425721,2.851,21,47423302,47423936,CCDS13727,21,9371758,48084354,0.005634,2,38712.60
65,21,47710904,47711398,0.494,21,47711267,47711447,CCDS33591,21,9371758,48084354,0.005634,2,38712.60


In [50]:

#####################################
#### step 2 bed 파일 intersect
#####################################

def estd_intersect(cnvtool, sample):
    
    tool_df = pd.DataFrame()
    ### output dir
    DIR="/data/analysis/project/231211_WXS_CNV/public_data/BAM/result_nh2"
    
    tool_name = cnvtool.__name__
    tool_df['sample'] = [sample]

  
    estd199_df = estd199_1000genomes(sample)
    estd199_df = estd199_df.loc[~estd199_df.chr.isin(["X", "Y"])]
    cnvtool_df = cnvtool(sample)
    cnvtool_df = cnvtool_df.loc[~cnvtool_df[f'{tool_name}_chr'].isin(["X", "Y"])]
    
    #print("#### 1> each CNV result")
    for k, v in estd199_df["CNV"].value_counts().to_dict().items():
        tool_df[f'1000genome_{k}_count'] = v
    
        
    for k, v in estd199_df.groupby("CNV")["size(kb)"].sum().to_dict().items():
        tool_df[f'1000genome_{k}_size(kb)'] = v
    
    for k, v in cnvtool_df[f'{tool_name}_cnv'].value_counts().to_dict().items():
        tool_df[f'{tool_name}_{k}_count'] = v
        
    for k, v in cnvtool_df.groupby(f"{tool_name}_cnv")[f"{tool_name}_size(kb)"].sum().to_dict().items():
        tool_df[f'{tool_name}_{k}_size(kb)'] = v

    display(tool_df.head())
    
    
    ## bedtools intersect
    estd_bed = pybedtools.BedTool.from_dataframe(estd199_df)
    bed_file = pybedtools.BedTool.from_dataframe(cnvtool_df)
    
    its_df = estd_bed.intersect(bed_file, wa=True, wb=True).to_dataframe(header=None)
                    
                    
    if its_df.empty:
        column_list = list(estd199_df.columns) + ['bed2_chr', 'bed2_start', 'bed2_end', 'bed2_cnv', 'bed2_size(kb)']
        its_df = pd.DataFrame(columns=column_list)
        display(its_df.head())
        its_df.to_csv(f'{DIR}/{sample}_{tool_name}.csv', sep="\t", index=False)
           
    if not its_df.empty:
        ## 1) estd199 & cnvtool
        its_df.columns = list(estd199_df.columns) + ['bed2_chr', 'bed2_start', 'bed2_end', 'bed2_cnv', 'bed2_size(kb)']
        
        # (its_df start와 bed2_start 비교) if min() (its_df end와 bed2_end 비교)
        its_df['length'] = its_df.apply(lambda x: max(x['end'], x['bed2_end']) - min(x['start'], x['bed2_start']), axis=1)
        its_df['overlab'] = its_df.apply(lambda x : min(x['end'], x['bed2_end']) - max(x['start'], x['bed2_start']), axis=1)
        its_df['intersect(%)'] = round(its_df['overlab'] / its_df['length'] * 100, 4)
        its_df = its_df.drop(columns=['length', 'overlab'])
        
        its_df.to_csv(f'{DIR}/{sample}_{tool_name}.csv', sep="\t", index=False)
        # intersect result -> count, size
        print('#### 2> intersect result')
        for k, v in its_df["CNV"].value_counts().to_dict().items():
            tool_df[f'intersect_{k}_count'] = v

        for k, v in its_df.groupby("CNV")["size(kb)"].sum().to_dict().items():
            tool_df[f'intersect_1000g_{k}_size(kb)'] = v
        for k, v in its_df.groupby("bed2_cnv")["bed2_size(kb)"].sum().to_dict().items():
            tool_df[f'intersect_{tool_name}_{k}_size(kb)'] = v

        
        ## 2) estd199 only
        estd_only_df = estd_bed.subtract(bed_file).to_dataframe(header=None)
        estd_only_df.columns = list(estd199_df.columns)
        estd_only_df.to_csv(f'{DIR}/{sample}_{tool_name}_estd_only.csv', sep="\t", index=False)
        print('#### 3> estd199 only')
        for k, v in estd_only_df["CNV"].value_counts().to_dict().items():
            tool_df[f'estd_only_{k}_count'] = v
        for k, v in estd_only_df.groupby("CNV")["size(kb)"].sum().to_dict().items():
            tool_df[f'estd_only_{k}_size(kb)'] = v
        
    
        ## 3) cnvtool only
        bed_only_df = bed_file.subtract(estd_bed).to_dataframe(header=None)
        bed_only_df.columns = ['bed2_chr', 'bed2_start', 'bed2_end', 'bed2_cnv', 'bed2_size(kb)']
        bed_only_df.to_csv(f'{DIR}/{sample}_{tool_name}_bed_only.csv', sep="\t", index=False)
        print('#### 4> cnvtool only')
        for k, v in bed_only_df["bed2_cnv"].value_counts().to_dict().items():
            tool_df[f'{tool_name}_only_{k}_count'] = v
        for k, v in bed_only_df.groupby("bed2_cnv")["bed2_size(kb)"].sum().to_dict().items():
            tool_df[f'{tool_name}_only_{k}_size(kb)'] = v

    
    print(tool_df.columns)
    tool_df.to_csv(f'{DIR}/{tool_name}_result.csv', sep="\t", mode='a', header=False, index=False)
    print(tool_df.columns)        
    print(f'{tool_name} intersect result : {its_df.shape}')
    display(its_df[["CNV", "bed2_cnv"]].head())
    
    
    

In [34]:
#sample_lst = ["NA06986", "NA06989","NA07051","NA07347","NA11843","NA12340","NA12761","NA12878","NA18959", "NA18960", "NA10851"]

#for sample in sample_lst:
    #print(sample)
    #estd_intersect(cnvkit, sample)
    #estd_intersect(codex, sample)
    #estd_intersect(canoes, sample)
    #estd_intersect(xhmm, sample)
    #print("####################################################################################")
    

sample_lst = ["Sample_Diag-excap51-HG002-EEogPU", "Sample_Diag-excap51-HG003-EEogPU", "Sample_Diag-excap51-HG004-EEogPU"]
for sample in sample_lst:
    estd_intersect(giab, sample)
    estd_intersect(xhmm, sample)

NA06986


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DUP_count,xhmm_DEL_count,xhmm_DEL_size(kb),xhmm_DUP_size(kb)
0,NA06986,1336,6863.93,3,2,62.542,391.233


Unnamed: 0,chr,start,end,CNV,size(kb),bed2_chr,bed2_start,bed2_end,bed2_cnv,bed2_size(kb)


Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DEL_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DEL_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)'],
      dtype='object')
xhmm intersect result : (0, 10)


Unnamed: 0,CNV,bed2_cnv


NA06989


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DUP_count,xhmm_DEL_count,xhmm_DEL_size(kb),xhmm_DUP_size(kb)
0,NA06989,932,4343.84,6,4,500.504,595.125


#### 2> intersect result
#### 3> estd199 only
#### 4> cnvtool only
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DEL_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DUP_count', 'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DEL_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DUP_count', 'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      d

Unnamed: 0,CNV,bed2_cnv
0,deletion,DEL
1,deletion,DEL


NA07051


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DEL_count,xhmm_DUP_count,xhmm_DEL_size(kb),xhmm_DUP_size(kb)
0,NA07051,1295,5096.47,6,6,290.114,616.045


#### 2> intersect result
#### 3> estd199 only
#### 4> cnvtool only
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DUP_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DUP_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DUP_count', 'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DUP_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DUP_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DUP_count', 'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      d

Unnamed: 0,CNV,bed2_cnv
0,deletion,DUP


NA07347


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DUP_count,xhmm_DEL_count,xhmm_DEL_size(kb),xhmm_DUP_size(kb)
0,NA07347,1313,5491.43,5,2,823.436,59.893


#### 2> intersect result
#### 3> estd199 only
#### 4> cnvtool only
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DEL_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DUP_count', 'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DEL_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DUP_count', 'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      d

Unnamed: 0,CNV,bed2_cnv
0,deletion,DEL
1,deletion,DEL
2,deletion,DEL


NA11843


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DEL_count,xhmm_DEL_size(kb)
0,NA11843,1057,6902.47,7,1988.251


#### 2> intersect result
#### 3> estd199 only
#### 4> cnvtool only
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DEL_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DEL_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)'],
      dtype='object')
xhmm intersect result : (4, 11)


Unnamed: 0,CNV,bed2_cnv
0,deletion,DEL
1,deletion,DEL
2,deletion,DEL
3,deletion,DEL


NA12340


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DEL_count,xhmm_DEL_size(kb)
0,NA12340,786,5818.86,5,1083.314


#### 2> intersect result
#### 3> estd199 only
#### 4> cnvtool only
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DEL_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DEL_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)'],
      dtype='object')
xhmm intersect result : (1, 11)


Unnamed: 0,CNV,bed2_cnv
0,deletion,DEL


NA12761


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DUP_count,xhmm_DUP_size(kb)
0,NA12761,1131,4343.16,53,13637.145


#### 2> intersect result
#### 3> estd199 only
#### 4> cnvtool only
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DUP_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DUP_count', 'xhmm_only_DUP_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DUP_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DUP_count', 'xhmm_only_DUP_size(kb)'],
      dtype='object')
xhmm intersect result : (7, 11)


Unnamed: 0,CNV,bed2_cnv
0,deletion,DUP
1,deletion,DUP
2,deletion,DUP
3,deletion,DUP
4,deletion,DUP


NA12878


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DEL_count,xhmm_DUP_count,xhmm_DEL_size(kb),xhmm_DUP_size(kb)
0,NA12878,898,2484.04,8,1,1175.34,23.056


#### 2> intersect result
#### 3> estd199 only
#### 4> cnvtool only
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DUP_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DEL_count', 'xhmm_only_DUP_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DUP_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DEL_count', 'xhmm_only_DUP_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      d

Unnamed: 0,CNV,bed2_cnv
0,deletion,DEL


NA18959


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DUP_count,xhmm_DEL_count,xhmm_DEL_size(kb),xhmm_DUP_size(kb)
0,NA18959,1185,4983.86,14,8,689.005,1993.516


#### 2> intersect result
#### 3> estd199 only
#### 4> cnvtool only
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DEL_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'intersect_xhmm_DUP_size(kb)', 'estd_only_deletion_count',
       'estd_only_deletion_size(kb)', 'xhmm_only_DUP_count',
       'xhmm_only_DEL_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DEL_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'intersect_xhmm_DUP_size(kb)', 'estd_only_deletion_count',
       'estd_only_deletion_size(kb)', 'xhmm_only_DUP_count',
       'xhmm_only_DEL

Unnamed: 0,CNV,bed2_cnv
0,deletion,DEL
1,deletion,DUP
2,deletion,DEL
3,deletion,DEL
4,deletion,DUP


NA18960


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DEL_count,xhmm_DUP_count,xhmm_DEL_size(kb),xhmm_DUP_size(kb)
0,NA18960,1148,5883.2,7,4,495.224,119.08


#### 2> intersect result
#### 3> estd199 only
#### 4> cnvtool only
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DUP_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DEL_count', 'xhmm_only_DUP_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DEL_count', 'xhmm_DUP_count', 'xhmm_DEL_size(kb)',
       'xhmm_DUP_size(kb)', 'intersect_deletion_count',
       'intersect_1000g_deletion_size(kb)', 'intersect_xhmm_DEL_size(kb)',
       'estd_only_deletion_count', 'estd_only_deletion_size(kb)',
       'xhmm_only_DEL_count', 'xhmm_only_DUP_count', 'xhmm_only_DEL_size(kb)',
       'xhmm_only_DUP_size(kb)'],
      d

Unnamed: 0,CNV,bed2_cnv
0,deletion,DEL


NA10851


Unnamed: 0,sample,1000genome_deletion_count,1000genome_deletion_size(kb),xhmm_DUP_count,xhmm_DUP_size(kb)
0,NA10851,1186,5518.22,1,23.782


Unnamed: 0,chr,start,end,CNV,size(kb),bed2_chr,bed2_start,bed2_end,bed2_cnv,bed2_size(kb)


Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DUP_size(kb)'],
      dtype='object')
Index(['sample', '1000genome_deletion_count', '1000genome_deletion_size(kb)',
       'xhmm_DUP_count', 'xhmm_DUP_size(kb)'],
      dtype='object')
xhmm intersect result : (0, 10)


Unnamed: 0,CNV,bed2_cnv
