## Create a table with the significant associations for burden test Haid

In [2]:
import pandas as pd

In [35]:
# Read the four files
df1 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Haid_CADD/f3393_CADD_skato_burden_sigbelowlogp5.txt')
df2 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Hdiff_CADD/f2247_CADD_skato_burden_sigbelowlogp5.txt')
df3 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Hnoise_CADD/f2257_CADD_skato_burden_sigbelowlogp5.txt')
df4 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Hboth_CADD/f2247_f2257_CADD_skato_burden_sigbelowlogp5.txt')

# Select required columns for each phenotype
df1_selected = df1[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]
df2_selected = df2[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]
df3_selected = df3[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]
df4_selected = df4[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]

In [36]:
# Rename columns to include the phenotype name
df1_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f3393','TEST', 'BETA_f3393', 'SE_f3393', 'LOG10P_f3393']
df2_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f2247','TEST', 'BETA_f2247', 'SE_f2247', 'LOG10P_f2247']
df3_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f2257','TEST', 'BETA_f2257', 'SE_f2257', 'LOG10P_f2257']
df4_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f2247_f2257', 'TEST', 'BETA_f2247_f2257', 'SE_f2247_f2257', 'LOG10P_f2247_f2257']

In [37]:
# Merge dataframes on common columns (CHROM and GENPOS)
merged_df = pd.merge(df1_selected, df2_selected, on=['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'TEST'], how='outer')
merged_df = pd.merge(merged_df, df3_selected, on=['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'TEST'], how='outer')
merged_df = pd.merge(merged_df, df4_selected, on=['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'TEST'], how='outer')

In [38]:
merged_df[['P_f3393', 'P_f2247', 'P_f2257', 'P_f2247_f2257']] = 10 ** -merged_df[['LOG10P_f3393', 'LOG10P_f2247', 'LOG10P_f2257', 'LOG10P_f2247_f2257']]

In [39]:
merged_df

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257,P_f3393,P_f2247,P_f2257,P_f2247_f2257
0,5,146339116,POU4F3.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,7.95054,,...,,,,,,,1.120624e-08,,,
1,6,133274701,EYA4.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,12.87610,,...,,,,,,,1.330148e-13,,,
2,6,75817495,MYO6.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,23.40790,,...,,6.16254,,,,10.12260,3.909309e-24,6.028233e-10,6.877966e-07,7.540497e-11
3,7,103374399,SLC26A5.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,5.37368,,...,,7.68818,,,,11.68360,4.229802e-06,1.409613e-12,2.050312e-08,2.072049e-12
4,8,101492770,GRHL2.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,11.67590,,...,,,,,,,2.109114e-12,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,2,178432096,PRKRA.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,,,...,,,,,,5.75009,,,,1.777911e-06
86,2,178432096,PRKRA.Mask1.0.005,Mask1.0.005,,ADD,,,,,...,,,0.013724,0.115639,0.024251,5.66873,,,,2.144223e-06
87,2,178432096,PRKRA.Mask1.0.01,Mask1.0.01,,ADD,,,,,...,,,0.013724,0.115639,0.024251,5.66873,,,,2.144223e-06
88,11,121045662,TBCEL-TECTA.Mask2.0.01,Mask2.0.01,,ADD,,,,,...,,,0.033482,0.071949,0.015984,5.13557,,,,7.318634e-06


In [42]:
merged_df = merged_df.fillna('NA')

In [46]:
skato_result = merged_df.loc[merged_df['TEST'] == 'ADD-SKATO']

In [47]:
skato_result = skato_result.sort_values(by=['CHROM'], ascending=[True])

In [None]:
for allele, group in skato_result.groupby('ALLELE1'):
    # Define the filename using the 'ALLELE1' name
    filename = f'/home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_skato_CADD_{allele}_output.csv'
    
    # Save the group to the CSV file
    group.to_csv(filename, index=False, header=True,dtype='string')

    print(f'Saved group with ALLELE1="{allele}" to {filename}')


In [28]:
#skato_result.to_csv('/home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_skato_CADD.csv', index=False, header=True)

In [63]:
burden_result = merged_df.loc[merged_df['TEST'] == 'ADD']

In [64]:
burden_result = burden_result.sort_values(by=['CHROM'], ascending=[True])

In [67]:
for allele, group in burden_result.groupby('ALLELE1'):
    # Define the filename using the 'ALLELE1' name
    filename = f'/home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_{allele}_output.csv'
    
    # Save the group to the CSV file
    group.to_csv(filename, index=False, header=True,dtype='string')

    print(f'Saved group with ALLELE1="{allele}" to {filename}')

Saved group with ALLELE1="Mask1.0.005" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask1.0.005_output.csv
Saved group with ALLELE1="Mask1.0.01" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask1.0.01_output.csv
Saved group with ALLELE1="Mask2.0.005" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask2.0.005_output.csv
Saved group with ALLELE1="Mask2.0.01" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask2.0.01_output.csv


In [30]:
#burden_result.to_csv('/home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD.csv', index=False, header=True)

## Start with the full summary stats

In [None]:
~/UKBiobank/RAP/results/autosomal/aggregate/Haid_CADD/

In [75]:
# Read the four files
df1 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Haid_CADD/f3393.regenie', comment='#', sep=" ")
df2 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Hdiff_CADD/f2247.regenie',comment='#', sep=" ")
df3 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Hnoise_CADD/f2257.regenie',comment='#', sep=" ")
df4 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Hboth_CADD/f2247_f2257.regenie',comment='#', sep=" ")

In [76]:
# Select required columns for each phenotype
df1_selected = df1[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]
df2_selected = df2[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]
df3_selected = df3[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]
df4_selected = df4[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]

In [77]:
# Rename columns to include the phenotype name
df1_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f3393','TEST', 'BETA_f3393', 'SE_f3393', 'LOG10P_f3393']
df2_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f2247','TEST', 'BETA_f2247', 'SE_f2247', 'LOG10P_f2247']
df3_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f2257','TEST', 'BETA_f2257', 'SE_f2257', 'LOG10P_f2257']
df4_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f2247_f2257', 'TEST', 'BETA_f2247_f2257', 'SE_f2247_f2257', 'LOG10P_f2247_f2257']

In [78]:
# Merge dataframes on common columns (CHROM and GENPOS)
merged_df = pd.merge(df1_selected, df2_selected, on=['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'TEST'], how='outer')
merged_df = pd.merge(merged_df, df3_selected, on=['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'TEST'], how='outer')
merged_df = pd.merge(merged_df, df4_selected, on=['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'TEST'], how='outer')

In [82]:
merged_df

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2247,LOG10P_f2247,A1FREQ_f2257,BETA_f2257,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257
0,1,33306769,A3GALT2.Mask1.singleton,Mask1.singleton,1.25259e-05,ADD,-1.14905,1.65401,0.312259,1.21429e-05,...,0.760633,0.0957525,1.72188e-05,0.609199,0.572847,0.54125,1.4366e-05,0.243349,0.748279,0.127832
1,1,33306769,A3GALT2.Mask1.0.005,Mask1.0.005,0.000265131,ADD,0.647419,0.301917,1.36031,0.000267143,...,0.166358,0.733251,0.000271527,0.178833,0.144475,0.665978,0.000260185,0.152691,0.178964,0.404998
2,1,33306769,A3GALT2.Mask1.0.01,Mask1.0.01,,ADD-SKAT,,,0.884134,,...,,0.994283,,,,1.32189,,,,1.03021
3,1,33306769,A3GALT2.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,1.15877,,...,,0.795937,,,,1.10832,,,,0.827662
4,1,33306769,A3GALT2.Mask1.0.01,Mask1.0.01,0.000265131,ADD,0.647419,0.301917,1.36031,0.000267143,...,0.166358,0.733251,0.000271527,0.178833,0.144475,0.665978,0.000260185,0.152691,0.178964,0.404998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374368,11,71582370,KRTAP5-11.Mask2.singleton,Mask2.singleton,,ADD,,,,,...,,,,,,,7.98113e-06,0.509823,1.00098,0.214297
374369,14,22575049,DAD1.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,,,,,7.98113e-06,1.32459,0.979513,0.753794
374370,16,48361588,SIAH1.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,,,,,7.98113e-06,-0.220062,1.05555,0.0783897
374371,17,7481519,SLC35G6.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,,,,,7.98113e-06,0.474355,1.00001,0.197056


In [83]:
merged_df[['LOG10P_f3393', 'LOG10P_f2247', 'LOG10P_f2257', 'LOG10P_f2247_f2257']] = merged_df[['LOG10P_f3393', 'LOG10P_f2247', 'LOG10P_f2257', 'LOG10P_f2247_f2257']].apply(pd.to_numeric, errors='coerce')

In [84]:
merged_df[['P_f3393', 'P_f2247', 'P_f2257', 'P_f2247_f2257']] = 10 ** -merged_df[['LOG10P_f3393', 'LOG10P_f2247', 'LOG10P_f2257', 'LOG10P_f2247_f2257']]

In [85]:
merged_df

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257,P_f3393,P_f2247,P_f2257,P_f2247_f2257
0,1,33306769,A3GALT2.Mask1.singleton,Mask1.singleton,1.25259e-05,ADD,-1.14905,1.65401,0.312259,1.21429e-05,...,0.572847,0.541250,1.4366e-05,0.243349,0.748279,0.127832,0.487238,0.802135,0.287574,0.745020
1,1,33306769,A3GALT2.Mask1.0.005,Mask1.0.005,0.000265131,ADD,0.647419,0.301917,1.360310,0.000267143,...,0.144475,0.665978,0.000260185,0.152691,0.178964,0.404998,0.043620,0.184820,0.215785,0.393552
2,1,33306769,A3GALT2.Mask1.0.01,Mask1.0.01,,ADD-SKAT,,,0.884134,,...,,1.321890,,,,1.030210,0.130577,0.101325,0.047655,0.093280
3,1,33306769,A3GALT2.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,1.158770,,...,,1.108320,,,,0.827662,0.069379,0.159979,0.077926,0.148709
4,1,33306769,A3GALT2.Mask1.0.01,Mask1.0.01,0.000265131,ADD,0.647419,0.301917,1.360310,0.000267143,...,0.144475,0.665978,0.000260185,0.152691,0.178964,0.404998,0.043620,0.184820,0.215785,0.393552
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374368,11,71582370,KRTAP5-11.Mask2.singleton,Mask2.singleton,,ADD,,,,,...,,,7.98113e-06,0.509823,1.00098,0.214297,,,,0.610524
374369,14,22575049,DAD1.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,7.98113e-06,1.32459,0.979513,0.753794,,,,0.176281
374370,16,48361588,SIAH1.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,7.98113e-06,-0.220062,1.05555,0.078390,,,,0.834854
374371,17,7481519,SLC35G6.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,7.98113e-06,0.474355,1.00001,0.197056,,,,0.635249


In [124]:
# Ensure 'TEST' column is a string type
merged_df['TEST'] = merged_df['TEST'].astype(str)

# Ensure 'P_1', 'P_2', 'P_3', 'P_4' columns are numeric types
merged_df[['P_f3393', 'P_f2247', 'P_f2257', 'P_f2247_f2257']] = merged_df[['P_f3393', 'P_f2247', 'P_f2257', 'P_f2247_f2257']].apply(pd.to_numeric, errors='coerce')


In [125]:
skato_result = merged_df.loc[(merged_df['TEST'] == 'ADD-SKATO')  & ((merged_df['P_f3393'] <= 2.5e-06) | (merged_df['P_f2247'] <= 2.5e-06) | (merged_df['P_f2257'] <= 2.5e-06) | (merged_df['P_f2247_f2257'] <= 2.5e-06))]

In [126]:
skato_result

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257,P_f3393,P_f2247,P_f2257,P_f2247_f2257
3401,1,102878022,COL11A1.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,2.95336,,...,,3.95672,,,,6.70613,0.001113371,1.516282e-07,0.0001104791,1.967297e-07
5474,1,11648706,FBXO2.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,3.80124,,...,,4.5813,,,,6.01412,0.0001580374,2.193764e-06,2.622406e-05,9.680103e-07
8045,1,40784099,KCNQ4.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,8.86184,,...,,4.18083,,,,5.98096,1.374548e-09,8.312085e-06,6.59432e-05,1.044816e-06
221522,2,178432096,PRKRA.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,0.072506,,...,,1.61591,,,,5.82715,0.8462403,2.801302e-05,0.02421531,1.488847e-06
221527,2,178432096,PRKRA.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,0.119361,,...,,1.51689,,,,5.75009,0.7596945,3.974935e-05,0.03041655,1.777911e-06
231749,3,142664192,PLS1.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,4.08784,,...,,5.40881,,,,8.30274,8.168833e-05,1.483064e-07,3.901126e-06,4.980352e-09
247669,5,271691,PDCD6.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,40.8393,,...,,41.6218,,,,63.2805,1.447771e-41,7.4627690000000005e-62,2.388911e-42,5.2420359999999995e-64
247922,5,146339116,POU4F3.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,7.95054,,...,,3.54282,,,,4.29373,1.120624e-08,9.758656e-05,0.0002865365,5.084755e-05
252748,6,133274701,EYA4.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,12.8761,,...,,1.89524,,,,3.47332,1.330148e-13,0.0004835262,0.012728,0.0003362637
252753,6,133274701,EYA4.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,5.69813,,...,,0.541309,,,,0.708654,2.003872e-06,0.05767133,0.2875352,0.1955897


In [None]:
skato_result['CHROM'] = skato_result['CHROM'].astype(int)
skato_result['GENPOS'] = skato_result['GENPOS'].astype(int)

In [128]:
skato_result = skato_result.sort_values(by=['CHROM', 'GENPOS'], ascending=[True, True])

In [129]:
skato_result

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257,P_f3393,P_f2247,P_f2257,P_f2247_f2257
5474,1,11648706,FBXO2.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,3.80124,,...,,4.5813,,,,6.01412,0.0001580374,2.193764e-06,2.622406e-05,9.680103e-07
8045,1,40784099,KCNQ4.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,8.86184,,...,,4.18083,,,,5.98096,1.374548e-09,8.312085e-06,6.59432e-05,1.044816e-06
3401,1,102878022,COL11A1.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,2.95336,,...,,3.95672,,,,6.70613,0.001113371,1.516282e-07,0.0001104791,1.967297e-07
221522,2,178432096,PRKRA.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,0.072506,,...,,1.61591,,,,5.82715,0.8462403,2.801302e-05,0.02421531,1.488847e-06
221527,2,178432096,PRKRA.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,0.119361,,...,,1.51689,,,,5.75009,0.7596945,3.974935e-05,0.03041655,1.777911e-06
231749,3,142664192,PLS1.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,4.08784,,...,,5.40881,,,,8.30274,8.168833e-05,1.483064e-07,3.901126e-06,4.980352e-09
247669,5,271691,PDCD6.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,40.8393,,...,,41.6218,,,,63.2805,1.447771e-41,7.4627690000000005e-62,2.388911e-42,5.2420359999999995e-64
247922,5,146339116,POU4F3.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,7.95054,,...,,3.54282,,,,4.29373,1.120624e-08,9.758656e-05,0.0002865365,5.084755e-05
255942,6,75817495,MYO6.Mask1.0.01,Mask1.0.01,,ADD-SKATO,,,23.4079,,...,,6.16254,,,,10.1226,3.909309e-24,6.028233e-10,6.877966e-07,7.540497e-11
255947,6,75817495,MYO6.Mask2.0.01,Mask2.0.01,,ADD-SKATO,,,12.1511,,...,,4.12317,,,,7.01917,7.061549e-13,1.546215e-07,7.530607e-05,9.568195e-08


In [131]:
for allele, group in skato_result.groupby('ALLELE1'):
    # Define the filename using the 'ALLELE1' name
    filename = f'/home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_skato_CADD_{allele}_output.csv'
    
    # Save the group to the CSV file
    group.to_csv(filename, index=False, header=True)

    print(f'Saved group with ALLELE1="{allele}" to {filename}')


Saved group with ALLELE1="Mask1.0.01" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_skato_CADD_Mask1.0.01_output.csv
Saved group with ALLELE1="Mask2.0.01" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_skato_CADD_Mask2.0.01_output.csv


In [132]:
burden_result = merged_df.loc[(merged_df['TEST'] == 'ADD')  & ((merged_df['P_f3393'] <= 2.5e-06) | (merged_df['P_f2247'] <= 2.5e-06) | (merged_df['P_f2257'] <= 2.5e-06) | (merged_df['P_f2247_f2257'] <= 2.5e-06))]

In [138]:
burden_result['CHROM'] = burden_result['CHROM'].astype(int)
burden_result['GENPOS'] = burden_result['GENPOS'].astype(int)

In [139]:
burden_result = burden_result.sort_values(by=['CHROM', 'GENPOS'], ascending=[True, True])

In [140]:
burden_result

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257,P_f3393,P_f2247,P_f2257,P_f2247_f2257
5472,1,11648706,FBXO2.Mask2.0.005,Mask2.0.005,0.0015219,ADD,0.534273,0.130861,3.96663,0.00163018,...,0.0582332,4.85004,0.00162655,0.343168,0.0672512,6.29648,0.0001079866,1.02334e-06,1.412407e-05,5.052659e-07
5475,1,11648706,FBXO2.Mask2.0.01,Mask2.0.01,0.0015219,ADD,0.534273,0.130861,3.96663,0.00163018,...,0.0582332,4.85004,0.00162655,0.343168,0.0672512,6.29648,0.0001079866,1.02334e-06,1.412407e-05,5.052659e-07
8043,1,40784099,KCNQ4.Mask2.0.005,Mask2.0.005,0.00329222,ADD,0.534572,0.089837,7.73459,0.00337572,...,0.0406924,3.51168,0.00338879,0.219029,0.047549,5.27954,1.842511e-08,4.287558e-05,0.0003078364,5.253636e-06
8046,1,40784099,KCNQ4.Mask2.0.01,Mask2.0.01,0.00329222,ADD,0.534572,0.089837,7.73459,0.00337572,...,0.0406924,3.51168,0.00338879,0.219029,0.047549,5.27954,1.842511e-08,4.287558e-05,0.0003078364,5.253636e-06
221520,2,178432096,PRKRA.Mask1.0.005,Mask1.0.005,0.0133171,ADD,0.0229401,0.0511612,0.184507,0.0137063,...,0.020714,1.83485,0.0137243,0.115639,0.0242506,5.66873,0.6538724,3.333036e-05,0.01462682,2.144223e-06
221523,2,178432096,PRKRA.Mask1.0.01,Mask1.0.01,0.0133171,ADD,0.0229401,0.0511612,0.184507,0.0137063,...,0.020714,1.83485,0.0137243,0.115639,0.0242506,5.66873,0.6538724,3.333036e-05,0.01462682,2.144223e-06
231747,3,142664192,PLS1.Mask1.0.005,Mask1.0.005,0.000413354,ADD,0.965209,0.226192,4.07867,0.000481162,...,0.107312,5.47165,0.000488445,0.708393,0.11858,8.35072,8.343149e-05,1.317619e-07,3.375592e-06,4.459437e-09
231750,3,142664192,PLS1.Mask1.0.01,Mask1.0.01,0.000413354,ADD,0.965209,0.226192,4.07867,0.000481162,...,0.107312,5.47165,0.000488445,0.708393,0.11858,8.35072,8.343149e-05,1.317619e-07,3.375592e-06,4.459437e-09
247667,5,271691,PDCD6.Mask2.0.005,Mask2.0.005,0.00341748,ADD,0.995987,0.0760999,32.5429,0.00383715,...,0.038343,28.0243,0.00382296,0.621381,0.042727,45.3719,2.864838e-33,9.578555e-44,9.455837000000001e-29,4.247173e-46
247670,5,271691,PDCD6.Mask2.0.01,Mask2.0.01,0.00341748,ADD,0.995987,0.0760999,32.5429,0.00383715,...,0.038343,28.0243,0.00382296,0.621381,0.042727,45.3719,2.864838e-33,9.578555e-44,9.455837000000001e-29,4.247173e-46


In [141]:
for allele, group in burden_result.groupby('ALLELE1'):
    # Define the filename using the 'ALLELE1' name
    filename = f'/home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_{allele}_output.csv'
    
    # Save the group to the CSV file
    group.to_csv(filename, index=False, header=True)

    print(f'Saved group with ALLELE1="{allele}" to {filename}')

Saved group with ALLELE1="Mask1.0.005" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask1.0.005_output.csv
Saved group with ALLELE1="Mask1.0.01" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask1.0.01_output.csv
Saved group with ALLELE1="Mask1.singleton" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask1.singleton_output.csv
Saved group with ALLELE1="Mask2.0.005" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask2.0.005_output.csv
Saved group with ALLELE1="Mask2.0.01" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask2.0.01_output.csv
Saved group with ALLELE1="Mask2.singleton" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask2.singleton_output.csv


## Now do it for the SKATO 0.005

In [None]:
~/UKBiobank/RAP/results/autosomal/aggregate/Haid_CADD/

In [3]:
# Read the four files
df1 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Haid_CADD_skato_0.005/f3393.regenie', comment='#', sep=" ")
df2 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Hdiff_CADD_skato_0.005/f2247.regenie',comment='#', sep=" ")
df3 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Hnoise_CADD_skato_0.005/f2257.regenie',comment='#', sep=" ")
df4 = pd.read_csv('~/UKBiobank/RAP/results/autosomal/aggregate/Hboth_CADD_skato_0.005/f2247_f2257.regenie',comment='#', sep=" ")

In [4]:
# Select required columns for each phenotype
df1_selected = df1[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]
df2_selected = df2[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]
df3_selected = df3[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]
df4_selected = df4[['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ', 'TEST', 'BETA', 'SE', 'LOG10P']]

In [5]:
# Rename columns to include the phenotype name
df1_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f3393','TEST', 'BETA_f3393', 'SE_f3393', 'LOG10P_f3393']
df2_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f2247','TEST', 'BETA_f2247', 'SE_f2247', 'LOG10P_f2247']
df3_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f2257','TEST', 'BETA_f2257', 'SE_f2257', 'LOG10P_f2257']
df4_selected.columns = ['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'A1FREQ_f2247_f2257', 'TEST', 'BETA_f2247_f2257', 'SE_f2247_f2257', 'LOG10P_f2247_f2257']

In [6]:
# Merge dataframes on common columns (CHROM and GENPOS)
merged_df = pd.merge(df1_selected, df2_selected, on=['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'TEST'], how='outer')
merged_df = pd.merge(merged_df, df3_selected, on=['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'TEST'], how='outer')
merged_df = pd.merge(merged_df, df4_selected, on=['CHROM', 'GENPOS', 'ID', 'ALLELE1', 'TEST'], how='outer')

In [7]:
merged_df

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2247,LOG10P_f2247,A1FREQ_f2257,BETA_f2257,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257
0,1,33306769,A3GALT2.Mask1.singleton,Mask1.singleton,1.25259e-05,ADD,-1.14905,1.65401,0.312259,1.21429e-05,...,0.760633,0.0957525,1.72188e-05,0.609199,0.572847,0.54125,1.4366e-05,0.243349,0.748279,0.127832
1,1,33306769,A3GALT2.Mask1.0.005,Mask1.0.005,,ADD-SKAT,,,0.884134,,...,,0.994283,,,,1.32189,,,,1.03021
2,1,33306769,A3GALT2.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,1.15877,,...,,0.795937,,,,1.10832,,,,0.827662
3,1,33306769,A3GALT2.Mask1.0.005,Mask1.0.005,0.000265131,ADD,0.647419,0.301917,1.36031,0.000267143,...,0.166358,0.733251,0.000271527,0.178833,0.144475,0.665978,0.000260185,0.152691,0.178964,0.404998
4,1,33306769,A3GALT2.Mask2.singleton,Mask2.singleton,0.000127347,ADD,-0.835586,0.555621,0.877416,0.00010625,...,0.263772,0.0160562,0.000105962,-0.00753251,0.231717,0.011411,0.00011014,-0.194088,0.272825,0.321631
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337945,11,71582370,KRTAP5-11.Mask2.singleton,Mask2.singleton,,ADD,,,,,...,,,,,,,7.98113e-06,0.509823,1.00098,0.214297
337946,14,22575049,DAD1.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,,,,,7.98113e-06,1.32459,0.979513,0.753794
337947,16,48361588,SIAH1.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,,,,,7.98113e-06,-0.220062,1.05555,0.0783897
337948,17,7481519,SLC35G6.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,,,,,7.98113e-06,0.474355,1.00001,0.197056


In [8]:
merged_df[['LOG10P_f3393', 'LOG10P_f2247', 'LOG10P_f2257', 'LOG10P_f2247_f2257']] = merged_df[['LOG10P_f3393', 'LOG10P_f2247', 'LOG10P_f2257', 'LOG10P_f2247_f2257']].apply(pd.to_numeric, errors='coerce')

In [9]:
merged_df[['P_f3393', 'P_f2247', 'P_f2257', 'P_f2247_f2257']] = 10 ** -merged_df[['LOG10P_f3393', 'LOG10P_f2247', 'LOG10P_f2257', 'LOG10P_f2247_f2257']]

In [10]:
merged_df

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257,P_f3393,P_f2247,P_f2257,P_f2247_f2257
0,1,33306769,A3GALT2.Mask1.singleton,Mask1.singleton,1.25259e-05,ADD,-1.14905,1.65401,0.312259,1.21429e-05,...,0.572847,0.541250,1.4366e-05,0.243349,0.748279,0.127832,0.487238,0.802135,0.287574,0.745020
1,1,33306769,A3GALT2.Mask1.0.005,Mask1.0.005,,ADD-SKAT,,,0.884134,,...,,1.321890,,,,1.030210,0.130577,0.101325,0.047655,0.093280
2,1,33306769,A3GALT2.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,1.158770,,...,,1.108320,,,,0.827662,0.069379,0.159979,0.077926,0.148709
3,1,33306769,A3GALT2.Mask1.0.005,Mask1.0.005,0.000265131,ADD,0.647419,0.301917,1.360310,0.000267143,...,0.144475,0.665978,0.000260185,0.152691,0.178964,0.404998,0.043620,0.184820,0.215785,0.393552
4,1,33306769,A3GALT2.Mask2.singleton,Mask2.singleton,0.000127347,ADD,-0.835586,0.555621,0.877416,0.00010625,...,0.231717,0.011411,0.00011014,-0.194088,0.272825,0.321631,0.132612,0.963704,0.974067,0.476836
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337945,11,71582370,KRTAP5-11.Mask2.singleton,Mask2.singleton,,ADD,,,,,...,,,7.98113e-06,0.509823,1.00098,0.214297,,,,0.610524
337946,14,22575049,DAD1.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,7.98113e-06,1.32459,0.979513,0.753794,,,,0.176281
337947,16,48361588,SIAH1.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,7.98113e-06,-0.220062,1.05555,0.078390,,,,0.834854
337948,17,7481519,SLC35G6.Mask1.singleton,Mask1.singleton,,ADD,,,,,...,,,7.98113e-06,0.474355,1.00001,0.197056,,,,0.635249


In [11]:
# Ensure 'TEST' column is a string type
merged_df['TEST'] = merged_df['TEST'].astype(str)

# Ensure 'P_1', 'P_2', 'P_3', 'P_4' columns are numeric types
merged_df[['P_f3393', 'P_f2247', 'P_f2257', 'P_f2247_f2257']] = merged_df[['P_f3393', 'P_f2247', 'P_f2257', 'P_f2247_f2257']].apply(pd.to_numeric, errors='coerce')


In [12]:
skato_result = merged_df.loc[(merged_df['TEST'] == 'ADD-SKATO')  & ((merged_df['P_f3393'] <= 2.5e-06) | (merged_df['P_f2247'] <= 2.5e-06) | (merged_df['P_f2257'] <= 2.5e-06) | (merged_df['P_f2247_f2257'] <= 2.5e-06))]

In [13]:
skato_result

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257,P_f3393,P_f2247,P_f2257,P_f2247_f2257
2711,1,102878022,COL11A1.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,2.95336,,...,,3.95672,,,,6.70613,0.001113371,1.516282e-07,0.0001104791,1.967297e-07
4362,1,11648706,FBXO2.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,3.80124,,...,,4.5813,,,,6.01412,0.0001580374,2.193764e-06,2.622406e-05,9.680103e-07
6409,1,40784099,KCNQ4.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,8.86184,,...,,4.18083,,,,5.98096,1.374548e-09,8.312085e-06,6.59432e-05,1.044816e-06
216012,2,178432096,PRKRA.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,0.072506,,...,,1.61591,,,,5.82715,0.8462403,2.801302e-05,0.02421531,1.488847e-06
216016,2,178432096,PRKRA.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,0.119361,,...,,1.51689,,,,5.75009,0.7596945,3.974935e-05,0.03041655,1.777911e-06
224165,3,142664192,PLS1.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,4.08784,,...,,5.40881,,,,8.30274,8.168833e-05,1.483064e-07,3.901126e-06,4.980352e-09
236847,5,271691,PDCD6.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,40.8393,,...,,41.6218,,,,63.2805,1.447771e-41,7.4627690000000005e-62,2.388911e-42,5.2420359999999995e-64
237049,5,146339116,POU4F3.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,7.95054,,...,,3.54282,,,,4.29373,1.120624e-08,9.758656e-05,0.0002865365,5.084755e-05
240893,6,133274701,EYA4.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,12.8761,,...,,1.89524,,,,3.47332,1.330148e-13,0.0004835262,0.012728,0.0003362637
240897,6,133274701,EYA4.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,5.69813,,...,,0.541309,,,,0.708654,2.003872e-06,0.05767133,0.2875352,0.1955897


In [14]:
skato_result['CHROM'] = skato_result['CHROM'].astype(int)
skato_result['GENPOS'] = skato_result['GENPOS'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  skato_result['CHROM'] = skato_result['CHROM'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  skato_result['GENPOS'] = skato_result['GENPOS'].astype(int)


In [15]:
skato_result = skato_result.sort_values(by=['CHROM', 'GENPOS'], ascending=[True, True])

In [16]:
skato_result

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257,P_f3393,P_f2247,P_f2257,P_f2247_f2257
4362,1,11648706,FBXO2.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,3.80124,,...,,4.5813,,,,6.01412,0.0001580374,2.193764e-06,2.622406e-05,9.680103e-07
6409,1,40784099,KCNQ4.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,8.86184,,...,,4.18083,,,,5.98096,1.374548e-09,8.312085e-06,6.59432e-05,1.044816e-06
2711,1,102878022,COL11A1.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,2.95336,,...,,3.95672,,,,6.70613,0.001113371,1.516282e-07,0.0001104791,1.967297e-07
216012,2,178432096,PRKRA.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,0.072506,,...,,1.61591,,,,5.82715,0.8462403,2.801302e-05,0.02421531,1.488847e-06
216016,2,178432096,PRKRA.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,0.119361,,...,,1.51689,,,,5.75009,0.7596945,3.974935e-05,0.03041655,1.777911e-06
224165,3,142664192,PLS1.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,4.08784,,...,,5.40881,,,,8.30274,8.168833e-05,1.483064e-07,3.901126e-06,4.980352e-09
236847,5,271691,PDCD6.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,40.8393,,...,,41.6218,,,,63.2805,1.447771e-41,7.4627690000000005e-62,2.388911e-42,5.2420359999999995e-64
237049,5,146339116,POU4F3.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,7.95054,,...,,3.54282,,,,4.29373,1.120624e-08,9.758656e-05,0.0002865365,5.084755e-05
243433,6,75817495,MYO6.Mask1.0.005,Mask1.0.005,,ADD-SKATO,,,23.4079,,...,,6.16254,,,,10.1226,3.909309e-24,6.028233e-10,6.877966e-07,7.540497e-11
243437,6,75817495,MYO6.Mask2.0.005,Mask2.0.005,,ADD-SKATO,,,17.5183,,...,,5.86307,,,,10.3195,3.031796e-18,8.978422e-12,1.370661e-06,4.791815e-11


In [17]:
for allele, group in skato_result.groupby('ALLELE1'):
    # Define the filename using the 'ALLELE1' name
    filename = f'/home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_skato_CADD_{allele}_output.csv'
    
    # Save the group to the CSV file
    group.to_csv(filename, index=False, header=True)

    print(f'Saved group with ALLELE1="{allele}" to {filename}')


Saved group with ALLELE1="Mask1.0.005" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_skato_CADD_Mask1.0.005_output.csv
Saved group with ALLELE1="Mask2.0.005" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_skato_CADD_Mask2.0.005_output.csv


In [132]:
burden_result = merged_df.loc[(merged_df['TEST'] == 'ADD')  & ((merged_df['P_f3393'] <= 2.5e-06) | (merged_df['P_f2247'] <= 2.5e-06) | (merged_df['P_f2257'] <= 2.5e-06) | (merged_df['P_f2247_f2257'] <= 2.5e-06))]

In [138]:
burden_result['CHROM'] = burden_result['CHROM'].astype(int)
burden_result['GENPOS'] = burden_result['GENPOS'].astype(int)

In [139]:
burden_result = burden_result.sort_values(by=['CHROM', 'GENPOS'], ascending=[True, True])

In [140]:
burden_result

Unnamed: 0,CHROM,GENPOS,ID,ALLELE1,A1FREQ_f3393,TEST,BETA_f3393,SE_f3393,LOG10P_f3393,A1FREQ_f2247,...,SE_f2257,LOG10P_f2257,A1FREQ_f2247_f2257,BETA_f2247_f2257,SE_f2247_f2257,LOG10P_f2247_f2257,P_f3393,P_f2247,P_f2257,P_f2247_f2257
5472,1,11648706,FBXO2.Mask2.0.005,Mask2.0.005,0.0015219,ADD,0.534273,0.130861,3.96663,0.00163018,...,0.0582332,4.85004,0.00162655,0.343168,0.0672512,6.29648,0.0001079866,1.02334e-06,1.412407e-05,5.052659e-07
5475,1,11648706,FBXO2.Mask2.0.01,Mask2.0.01,0.0015219,ADD,0.534273,0.130861,3.96663,0.00163018,...,0.0582332,4.85004,0.00162655,0.343168,0.0672512,6.29648,0.0001079866,1.02334e-06,1.412407e-05,5.052659e-07
8043,1,40784099,KCNQ4.Mask2.0.005,Mask2.0.005,0.00329222,ADD,0.534572,0.089837,7.73459,0.00337572,...,0.0406924,3.51168,0.00338879,0.219029,0.047549,5.27954,1.842511e-08,4.287558e-05,0.0003078364,5.253636e-06
8046,1,40784099,KCNQ4.Mask2.0.01,Mask2.0.01,0.00329222,ADD,0.534572,0.089837,7.73459,0.00337572,...,0.0406924,3.51168,0.00338879,0.219029,0.047549,5.27954,1.842511e-08,4.287558e-05,0.0003078364,5.253636e-06
221520,2,178432096,PRKRA.Mask1.0.005,Mask1.0.005,0.0133171,ADD,0.0229401,0.0511612,0.184507,0.0137063,...,0.020714,1.83485,0.0137243,0.115639,0.0242506,5.66873,0.6538724,3.333036e-05,0.01462682,2.144223e-06
221523,2,178432096,PRKRA.Mask1.0.01,Mask1.0.01,0.0133171,ADD,0.0229401,0.0511612,0.184507,0.0137063,...,0.020714,1.83485,0.0137243,0.115639,0.0242506,5.66873,0.6538724,3.333036e-05,0.01462682,2.144223e-06
231747,3,142664192,PLS1.Mask1.0.005,Mask1.0.005,0.000413354,ADD,0.965209,0.226192,4.07867,0.000481162,...,0.107312,5.47165,0.000488445,0.708393,0.11858,8.35072,8.343149e-05,1.317619e-07,3.375592e-06,4.459437e-09
231750,3,142664192,PLS1.Mask1.0.01,Mask1.0.01,0.000413354,ADD,0.965209,0.226192,4.07867,0.000481162,...,0.107312,5.47165,0.000488445,0.708393,0.11858,8.35072,8.343149e-05,1.317619e-07,3.375592e-06,4.459437e-09
247667,5,271691,PDCD6.Mask2.0.005,Mask2.0.005,0.00341748,ADD,0.995987,0.0760999,32.5429,0.00383715,...,0.038343,28.0243,0.00382296,0.621381,0.042727,45.3719,2.864838e-33,9.578555e-44,9.455837000000001e-29,4.247173e-46
247670,5,271691,PDCD6.Mask2.0.01,Mask2.0.01,0.00341748,ADD,0.995987,0.0760999,32.5429,0.00383715,...,0.038343,28.0243,0.00382296,0.621381,0.042727,45.3719,2.864838e-33,9.578555e-44,9.455837000000001e-29,4.247173e-46


In [141]:
for allele, group in burden_result.groupby('ALLELE1'):
    # Define the filename using the 'ALLELE1' name
    filename = f'/home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_{allele}_output.csv'
    
    # Save the group to the CSV file
    group.to_csv(filename, index=False, header=True)

    print(f'Saved group with ALLELE1="{allele}" to {filename}')

Saved group with ALLELE1="Mask1.0.005" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask1.0.005_output.csv
Saved group with ALLELE1="Mask1.0.01" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask1.0.01_output.csv
Saved group with ALLELE1="Mask1.singleton" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask1.singleton_output.csv
Saved group with ALLELE1="Mask2.0.005" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask2.0.005_output.csv
Saved group with ALLELE1="Mask2.0.01" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask2.0.01_output.csv
Saved group with ALLELE1="Mask2.singleton" to /home/dmc2245/UKBiobank/RAP/results/autosomal/aggregate/ARHI_burden_CADD_Mask2.singleton_output.csv
