Major points:

- In order to be able to assess the quality of the mitochondrial sequencing results, the mitochondrial haplotypes of the individual persons should be listed in the Suppl. Materials, indicating the haplogroup. In addition, a quality check should be carried out, e.g. in the EMPOP database. It would be very interesting to analyze whether there is a haplogroup dependency of the occurrence of somatic mutations.

- сделать extended Fisher test для гаплогрупп (Константин) и визуализацию (с Богданом)

In [80]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df_hap = pd.read_csv('../data/Suplementary_File_S2_Haplogroups.tsv', sep='\t', index_col=0).dropna()
nsamples = len(df_hap)
df_hap

Unnamed: 0_level_0,Haplogroup,ContaminationStatus,ContaminationLevel,Distance,SampleCoverage
Patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
7,H5b,NO,ND,8.0,145.0
10,U5b1a*,NO,ND,14.0,31.0
12,H11a2a2,NO,ND,11.0,73.0
16,H1,NO,ND,6.0,74.0
17,H,NO,ND,5.0,84.0
...,...,...,...,...,...
402,W6a,NO,ND,14.0,39.0
405,U5a2a1,NO,ND,16.0,188.0
407,G1b1*,NO,ND,16.0,228.0
408,U5a2a1,NO,ND,16.0,264.0


In [35]:
df_mut = pd.read_excel('../data/Suplementary_File_S1_Clinical_data_of_Osteoarthritic_cohort.xlsx', 
              sheet_name='Data', index_col=0)
df_mut['mut_T408A'] = (df_mut['SNP_position'] == '408.0') & (df_mut['SNP_type'] == 'T>A')
df_mut['mut_A189G'] = (df_mut['SNP_position'] == '189.0') & (df_mut['SNP_type'] == 'A>G')
df_mut

Unnamed: 0_level_0,Age,Gender,Weight,Height,Blood_type,Rh_factor,Systolic_BP,Diastolic_BP,Heart_rate,Red_blood_cells,...,SNP_position,SNP_type,allele_frequency,MitoHPC_filter_type,Mutation_type,mtDNA_copy_number,Arthritis_type,ICD,mut_T408A,mut_A189G
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10,57,M,82.0,176,1,1,130,90.0,80.0,4.58,...,0,,,strict,cont2_nonCR,,primary,"M16, I10.0",False,False
10,57,M,82.0,176,1,1,130,90.0,80.0,4.58,...,0,,,loose,cont2_nonCR,,primary,"M16, I10.0",False,False
101,80,F,85.0,167,2,1,120,80.0,86.0,4.02,...,408.0,T>A,0.029351,loose,exp_data,249.3968,primary,"M17, K29.9",True,False
101,80,F,85.0,167,2,1,120,80.0,86.0,4.02,...,189.0,A>G,0.070369,loose,exp_data,249.3968,primary,"M17, K29.9",False,True
101,80,F,85.0,167,2,1,120,80.0,86.0,4.02,...,408.0,T>A,0.029351,strict,exp_data,249.3968,primary,"M17, K29.9",True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,57,M,125.0,187,3,1,125,90.0,70.0,5.55,...,,,,,,,primary,"M16, K29.6",False,False
94,38,F,58.0,171,2,1,120,80.0,82.0,3.58,...,189.0,A>G,0.261817,loose,exp_data,279.9832,post-traumatic,M16,False,True
94,38,F,58.0,171,2,1,120,80.0,82.0,3.58,...,189.0,A>G,0.261817,strict,exp_data,279.9832,post-traumatic,M16,False,True
96,63,M,112.0,180,2,0,130,90.0,99.0,4.53,...,189.0,A>G,0.358696,strict,exp_data,376.4480,primary,"M16, I10.0, I25.9",False,True


In [57]:
carriers_408_loose = df_mut[(df_mut.MitoHPC_filter_type == 'loose') & (df_mut.mut_T408A)]
carriers_189_loose = df_mut[(df_mut.MitoHPC_filter_type == 'loose') & (df_mut.mut_A189G)]

carriers_408_strict = df_mut[(df_mut.MitoHPC_filter_type == 'strict') & (df_mut.mut_T408A)]
carriers_189_strict = df_mut[(df_mut.MitoHPC_filter_type == 'strict') & (df_mut.mut_A189G)]

carriers_189_loose.shape, carriers_189_strict.shape, carriers_408_loose.shape, carriers_408_strict.shape

((56, 49), (41, 49), (43, 49), (39, 49))

In [69]:
df_hap['mut_T408A'] = df_hap.index.isin(carriers_408_loose.index)
df_hap['mut_A189G'] = df_hap.index.isin(carriers_189_loose.index)

In [78]:
df_hap.groupby('mut_T408A').Haplogroup.value_counts().unstack().fillna(0).astype(int)

Haplogroup,D5a3a1,G1b1*,H,H+152,H1,H11a,H11a2a2,H12a,H13a1a1a,H13a1a1e,...,U8b1a1,V13,V1a1,V1a1b,V7a,W1,W3a1,W6a,X2n,Y1
mut_T408A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,0,1,2,0,0,1,1,1,1,0,...,1,1,0,0,0,1,1,3,0,0
True,1,0,1,1,1,0,0,0,0,1,...,0,0,1,1,1,1,0,0,1,1
