# Pattern for genotypes:
### *D.R. to AL & ASAQ:* N/A
### *D.R. to AL & DHA-PPQ:* `KNF**Y2*`
### *D.R. to ASAQ & DHA-PPQ:* `TYY**Y2*`

In [1]:
# Imports
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

In [2]:
plt.rcParams['figure.figsize'] = [20, 10]

In [3]:
# data frame for AL baseline
df_al = pd.read_csv('al.txt', sep='\t')

In [4]:
# Filtering out multiple-resistance cases
# For AL - track genotypes res. to Art. and LM
df_al.iloc[:,0:151].filter(regex='KNF..Y..', axis=1).sum(axis=1)

0          0
1          0
2          0
3          0
4          0
5          0
6          0
7          0
8          0
9          0
10         0
11         0
12         0
13         0
14         0
15         0
16         0
17         0
18         0
19         0
20         0
21         0
22         0
23         0
24         0
25         0
26         0
27         0
28         0
29         0
       ...  
451    12443
452    12283
453    12149
454    12066
455    11975
456    11954
457    12073
458    12021
459    12070
460    12071
461    12160
462    12175
463    12314
464    12371
465    12548
466    12691
467    12814
468    12904
469    13057
470    13199
471    13196
472    13098
473    13252
474    13255
475    13203
476    13094
477    13186
478    13208
479    13388
480    13481
Length: 481, dtype: int64

In [5]:
# Column Count of all genotypes
# To check if the filter works
df_al.iloc[:,22:151]

Unnamed: 0,KNY--C1x,KNY--C1X,KNY--C2x,KNY--C2X,KNY--Y1x,KNY--Y1X,KNY--Y2x,KNY--Y2X,KYY--C1x,KYY--C1X,...,TNFNFY2X,TYFYFC1x,TYFYFC1X,TYFYFC2x,TYFYFC2X,TYFYFY1x,TYFYFY1X,TYFYFY2x,TYFYFY2X,sep.6
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1111


In [6]:
# excluding all TYY's
df_al.iloc[:,22:151].filter(regex='^(TYF|TNY|TNF|KYY|KYF|KNY|KNF)..Y..$', axis=1)

Unnamed: 0,KNY--Y1x,KNY--Y1X,KNY--Y2x,KNY--Y2X,KYY--Y1x,KYY--Y1X,KYY--Y2x,KYY--Y2X,KNF--Y1x,KNF--Y1X,...,TNYNYY2x,TNYNYY2X,TNFNFY1x,TNFNFY1X,TNFNFY2x,TNFNFY2X,TYFYFY1x,TYFYFY1X,TYFYFY2x,TYFYFY2X
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [9]:
# Sum up all 
df_al['total_geno_freq'] = df_al.iloc[:,22:150].sum(axis=1)

In [13]:
df_al['mdr_count'] = df_al.iloc[:,0:150].filter(regex='^(TYF|TNY|TNF|KYY|KYF|KNY|KNF)..Y..$', axis=1).sum(axis=1)

In [14]:
df_al['current_percentage'] = df_al['mdr_count'] / df_al['total_geno_freq']

In [15]:
df_al

Unnamed: 0,current_time,sclock_to_time,year,month,day,seasonal_fac,treated_p_5-,treated_p_5+,population,sep,...,TYFYFC2x.1,TYFYFC2X.1,TYFYFY1x.1,TYFYFY1X.1,TYFYFY2x.1,TYFYFY2X.1,Unnamed: 279,total_geno_freq,mdr_count,current_percentage
0,0,631152000,1990,1,1,1,0.6,0.5,50000,-1111,...,0,0,0,0,0,0,,4989,0,0.000000
1,31,633830400,1990,2,1,1,0.0,0.0,50099,-1111,...,0,0,0,0,0,0,,7712,0,0.000000
2,59,636249600,1990,3,1,1,0.0,0.0,50233,-1111,...,0,0,0,0,0,0,,10623,0,0.000000
3,90,638928000,1990,4,1,1,0.0,0.0,50352,-1111,...,0,0,0,0,0,0,,14671,0,0.000000
4,120,641520000,1990,5,1,1,0.0,0.0,50457,-1111,...,0,0,0,0,0,0,,19019,0,0.000000
5,151,644198400,1990,6,1,1,0.0,0.0,50573,-1111,...,0,0,0,0,0,0,,23501,0,0.000000
6,181,646790400,1990,7,1,1,0.0,0.0,50677,-1111,...,0,0,0,0,0,0,,27013,0,0.000000
7,212,649468800,1990,8,1,1,0.0,0.0,50791,-1111,...,0,0,0,0,0,0,,28812,0,0.000000
8,243,652147200,1990,9,1,1,0.0,0.0,50897,-1111,...,0,0,0,0,0,0,,30434,0,0.000000
9,273,654739200,1990,10,1,1,0.0,0.0,50989,-1111,...,0,0,0,0,0,0,,31115,0,0.000000


In [23]:
df_al[df_al['current_percentage'].gt(0.01)].index[0]

130

In [24]:
df_al.loc[130].iloc[0] / 365

10.841095890410958

In [22]:
df_al[df_al['current_percentage'].gt(0.01)]

Unnamed: 0,current_time,sclock_to_time,year,month,day,seasonal_fac,treated_p_5-,treated_p_5+,population,sep,...,TYFYFC2x.1,TYFYFC2X.1,TYFYFY1x.1,TYFYFY1X.1,TYFYFY2x.1,TYFYFY2X.1,Unnamed: 279,total_geno_freq,mdr_count,current_percentage
130,3957,973036800,2000,11,1,1,0.4,0.4,66941,-1111,...,0,0,0,0,0,0,,5393,54,0.010013
131,3987,975628800,2000,12,1,1,0.4,0.4,67083,-1111,...,0,0,0,0,0,0,,5191,59,0.011366
132,4018,978307200,2001,1,1,1,0.4,0.4,67239,-1111,...,0,0,0,0,0,0,,4958,64,0.012908
133,4049,980985600,2001,2,1,1,0.4,0.4,67400,-1111,...,0,0,0,0,0,0,,4783,57,0.011917
134,4077,983404800,2001,3,1,1,0.4,0.4,67554,-1111,...,0,0,0,0,0,0,,4686,53,0.011310
135,4108,986083200,2001,4,1,1,0.4,0.4,67709,-1111,...,0,0,0,0,0,0,,4689,47,0.010023
136,4138,988675200,2001,5,1,1,0.4,0.4,67875,-1111,...,0,0,0,0,0,0,,4764,51,0.010705
137,4169,991353600,2001,6,1,1,0.4,0.4,68025,-1111,...,0,0,0,0,0,0,,4853,56,0.011539
138,4199,993945600,2001,7,1,1,0.4,0.4,68194,-1111,...,0,0,0,0,0,0,,4948,57,0.011520
139,4230,996624000,2001,8,1,1,0.4,0.4,68349,-1111,...,0,0,0,0,0,0,,5175,70,0.013527


In [53]:
# Build geno dict for text replacement
geno_db_df = pd.read_excel('geno-database.xlsx')
## converting df to series, and then to dict
geno_dict = geno_db_df.set_index('ID')['Shortname']
# Create Data Frame
adf = pd.read_csv('fo-al.txt', sep='\t')
# Text replacement
adf['to'] = adf['to'].replace(geno_dict)
adf['from'] = adf['from'].replace(geno_dict)

In [56]:
newdf = adf[['time','to']]

In [69]:
adf[adf['to'].str.contains('^(TYF|TNY|TNF|KYY|KYF|KNY|KNF)..Y..$')].index[0]

  """Entry point for launching an IPython kernel.


Unnamed: 0,time,from,to
0,3653,TNY--C1x,TNY--Y1x
21,3659,TNY--C1x,TNY--Y1x
24,3659,TNY--C1x,TNY--Y1x
33,3661,TNY--C1x,TNY--Y1x
42,3663,TNY--C1x,TNY--Y1x
43,3663,TNY--C1x,TNY--Y1x
51,3665,TNY--Y1x,KNY--Y1x
54,3666,TNY--C1x,TNY--Y1x
58,3667,TNY--C1x,TNY--Y1x
59,3667,TNY--C1x,TNY--Y1x
