In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from numpy import asarray
from numpy import savetxt
import re
import statsmodels.api as sm
pd.options.display.max_columns=50
pd.options.display.max_rows=50
from scipy import stats
import scipy
from platform import python_version

In [None]:
print(python_version())

In [None]:
def inches_to_cm(inches):
    return inches/2.54

In [None]:
def LR(coefficients_names1, coefficients_present1, coefficients_names2, coefficients_present2, df, gene, ylim, name, panels, dimensions):
    
    output_directory = r"...\output\\"
    dimension_ratio = dimensions[1]/14.5
    df1 = df.copy()

    max_lim = ylim[0]
    min_lim = ylim[1]
    
    if(panels==True):
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(inches_to_cm(dimensions[0]),inches_to_cm(dimensions[1])))
    else:
        fig, (ax1) = plt.subplots(1, 1, figsize=(inches_to_cm(dimensions[0]),inches_to_cm(dimensions[1])))
    
    error_kw=dict(lw=1.25* dimension_ratio, capsize=4* dimension_ratio, capthick=1.25* dimension_ratio)
    
##############################################################################################################    
        
    LR_model = sm.OLS(df1[gene], sm.add_constant(df1[coefficients_names1].astype(int)), missing='drop')
    results = LR_model.fit()
    coefficients = results.params[coefficients_present1]
    coef_err = results.bse[coefficients_present1]
    
    ax1.set_ylim([min_lim, max_lim])
    
    ax1.bar(x=coefficients_present1, height=coefficients, color='gray', yerr=coef_err, ecolor='black', error_kw=error_kw)
    ax1.axhline(y=0, color='k', linestyle='--', linewidth=2* dimension_ratio, label="unvaccinated")

    ax1.tick_params(axis='x', direction='in', length=3, labelsize=9* dimension_ratio, rotation=45)
    ax1.tick_params(axis='y', direction='in', length=3, labelsize=9* dimension_ratio, right=True)

    ax1.set_ylabel('Ct regression coefficients, ' + gene, fontsize=10* dimension_ratio)
    ax1.set_xlabel("Days since 2nd dose", fontsize=10* dimension_ratio, loc='left')
    
    
    significance = ''
    for i, v in enumerate(results.pvalues[coefficients_present1]):
        if(coefficients[i] > 0):
            height = coefficients[i] + coef_err[i] + 0.01*(max_lim-min_lim)
        else:
            height = coefficients[i] - coef_err[i] - 0.04*(max_lim-min_lim)
        if((v<0.001)):
            significance='***'
        elif((v<0.01)):
            significance='**'
        elif((v<0.05)):
            significance='*'
        elif((v>=0.05)):
            significance='ns'
            
        ax1.text(x=i,
            y=height,
            s=significance,
            color='black',
            horizontalalignment='center', fontsize=10* dimension_ratio)
        
        export_array_panel_1 = pd.DataFrame(np.array([np.array(range(1,len(coefficients)+1)),np.array(coefficients), np.array(coef_err)]).T, columns=['column #', 'coefficients', 'coefficients error'])
        export_array_panel_1.to_csv(output_directory + r"csv\\" + name + ' - ' + gene + '.csv')
        
###############################################################################

    if(panels==True):
        LR_model = sm.OLS(df1[gene], sm.add_constant(df1[coefficients_names2].astype(int)), missing='drop')
        results = LR_model.fit()
        coefficients = results.params[coefficients_present2]
        coef_err = results.bse[coefficients_present2]

        ax2.set_ylim([min_lim, max_lim])

        ax2.bar(x=coefficients_present2, height=coefficients, color='gray', yerr=coef_err, ecolor='black', error_kw=error_kw)

        ax2.axhline(y=0, color='k', linestyle='--', linewidth=2* dimension_ratio, label="unvaccinated")
        ax2.legend(fontsize=10* dimension_ratio, frameon=False)

        ax2.tick_params(axis='x', direction='in', length=3, labelsize=9* dimension_ratio, rotation=45)
        ax2.tick_params(axis='y', direction='in', length=3, labelsize=9* dimension_ratio, right=True)

        ax2.set_xlabel("Vaccination status", fontsize=10* dimension_ratio)


        significance = ''
        for i, v in enumerate(results.pvalues[coefficients_present2]):
            if(coefficients[i] > 0):
                height = coefficients[i] + coef_err[i] + 0.01*(max_lim-min_lim)
            else:
                height = coefficients[i] - coef_err[i] - 0.04*(max_lim-min_lim)
            if((v<0.001)):
                significance='***'
            elif((v<0.01)):
                significance='**'
            elif((v<0.05)):
                significance='*'
            elif((v>=0.05)):
                significance='ns'

            ax2.text(x=i,
                y=height,
                s=significance,
                color='black',
                horizontalalignment='center', fontsize=10* dimension_ratio)
    
    if(panels==True):
        export_array_panel_2 = pd.DataFrame(np.array([np.array(range(1,len(coefficients)+1)),np.array(coefficients), np.array(coef_err)]).T, columns=['column #', 'coefficients', 'coefficients error'])
        export_array_panel_2.to_csv(output_directory + r"csv\\" + name + ' - ' + gene + " - panel 2" + '.csv')

    
    fig.savefig(output_directory + r"png\\" + name + ' - ' + gene + '.png', dpi=600, bbox_inches='tight')
    fig.savefig(output_directory + r"eps\\" + name + ' - ' + gene + '.eps', dpi=600, bbox_inches='tight')
    fig.savefig(output_directory + r"jpg\\" + name + ' - ' + gene + '.jpg', dpi=600, bbox_inches='tight')
    fig.savefig(output_directory + r"tif\\" + name + ' - ' + gene + '.tif', dpi=600, bbox_inches='tight')
    fig.savefig(output_directory + r"svg\\" + name + ' - ' + gene + '.svg', dpi=600, bbox_inches='tight')

In [None]:
def binning_names(bins):
    names = []
    for i in range(len(bins)-1):
        names.append(str(bins[i]+1) + '-' + str(bins[i+1]))
    names.append('>' + str(bins[len(bins)-1]))
    return names

In [None]:
def binning(df, bins, zeros, comparing):
    bin_names = binning_names(bins)
    if(zeros==False):
        for i in range(len(bins)-1):
            if(comparing==False):
                df[bin_names[i]] = df['time vaccinated'].apply(lambda x: 1 if (x>bins[i] and x<=bins[i+1]) else 0)
            else:
                df[bin_names[i]] = df['time vaccinated'].apply(lambda x: 1 if (x>bins[i]) else 0)
        df[bin_names[len(bins)-1]] = df['time vaccinated'].apply(lambda x: 1 if (x>bins[len(bins)-1]) else 0)
    else:
        for i in range(len(bins)-1):
            df[bin_names[i]] = 0
        df[bin_names[len(bins)-1]] = 0
    return df

In [None]:
hospital = pd.read_pickle(r"...\Kishoni_Coroa_hospitalization.pkl")

In [None]:
project_path = r"...\20210909"

file_name = r"df_all.pkl"
file_path = project_path + "\\" + file_name
df_all = pd.read_pickle(file_path)

file_name = r"df_vaccinated.pkl"
file_path = project_path + "\\" + file_name
df_vaccinated = pd.read_pickle(file_path)

file_name = r"df_unvaccinated.pkl"
file_path = project_path + "\\" + file_name
df_unvaccinated = pd.read_pickle(file_path)

file_name = r"df_boosters.pkl"
file_path = project_path + "\\" + file_name
df_booster = pd.read_pickle(file_path)

df_vaccinated = df_vaccinated[df_vaccinated['immunosuppression']==0]
df_booster = df_booster[df_booster['immunosuppression']==0]
df_unvaccinated = df_unvaccinated[df_unvaccinated['immunosuppression']==0]

In [None]:
# January 1st - 366
# February 1st - 397
# March 1st - 425
# April 1st - 456
# May 1st - 486
# June 1st - 517
# July 1st - 547
# August 1st - 578

earliest_sample_date = 544

bins = [6, 30, 60, 120, 180]

df_vaccinated = binning(df_vaccinated, bins, False, False)
df_booster = binning(df_booster, bins, False, False)
df_unvaccinated = binning(df_unvaccinated, bins, True, False)

df_vaccinated['vaccinated'] = 1
df_vaccinated['booster'] = 0

df_booster['vaccinated'] = 1
df_booster['booster'] = 1

df_unvaccinated['vaccinated'] = 0
df_unvaccinated['booster'] = 0

df_all_LR = pd.concat([df_vaccinated, df_unvaccinated, df_booster])
df_all_LR = df_all_LR.drop_duplicates(subset='seq_id')
df_all_LR = df_all_LR[df_all_LR['sample_date_lab']>=earliest_sample_date]
df_all_LR['sample_date_lab'] = df_all_LR['sample_date_lab'] - df_all_LR['sample_date_lab'].min()

df_all_LR['age 30-39'] = df_all_LR['age'].apply(lambda x: 1 if (x>=30 and x<40) else 0)
df_all_LR['age 40-49'] = df_all_LR['age'].apply(lambda x: 1 if (x>=40 and x<50) else 0)
df_all_LR['age 50-59'] = df_all_LR['age'].apply(lambda x: 1 if (x>=50 and x<60) else 0)
df_all_LR['age 60-69'] = df_all_LR['age'].apply(lambda x: 1 if (x>=60 and x<70) else 0)
df_all_LR['age 70-79'] = df_all_LR['age'].apply(lambda x: 1 if (x>=70 and x<80) else 0)
df_all_LR['age 80-89'] = df_all_LR['age'].apply(lambda x: 1 if (x>=80 and x<90) else 0)
df_all_LR['age 90-99'] = df_all_LR['age'].apply(lambda x: 1 if (x>=90 and x<100) else 0)
df_all_LR['age 100-109'] = df_all_LR['age'].apply(lambda x: 1 if (x>=100 and x<110) else 0)


df_all_LR = df_all_LR[df_all_LR['age']>=20]
df_all_LR = df_all_LR[df_all_LR['age']<=120]


ylim = [6.1, -0.2]

reg_coef1 = binning_names(bins) + ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'booster']
reg_coef_present1 = binning_names(bins) + ['booster']
reg_coef2 =  ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'vaccinated', 'booster']
reg_coef_present2 = ['vaccinated', 'booster']


for gene in ['N', 'RdRp', 'E']:
    
    if(gene=='RdRp'):
        name = "Figure 1"
        LR(reg_coef1, reg_coef_present1, reg_coef2, reg_coef_present2, df_all_LR, gene, ylim, name, True, (18, 18))
    else:
        name = "Extended Data Figure 1"
        LR(reg_coef1, reg_coef_present1, reg_coef2, reg_coef_present2, df_all_LR, gene, ylim, name, True, (9, 9))

In [None]:
# January 1st - 366
# February 1st - 397
# March 1st - 425
# April 1st - 456
# May 1st - 486
# June 1st - 517
# July 1st - 547
# August 1st - 578

earliest_sample_date = 544

bins = [6, 30, 60, 120, 180]

df_vaccinated = binning(df_vaccinated, bins, False, False)
df_booster = binning(df_booster, bins, False, False)
df_unvaccinated = binning(df_unvaccinated, bins, True, False)

df_vaccinated['vaccinated'] = 1
df_vaccinated['booster'] = 0

df_booster['vaccinated'] = 1
df_booster['booster'] = 1

df_unvaccinated['vaccinated'] = 0
df_unvaccinated['booster'] = 0

df_all_LR = pd.concat([df_vaccinated, df_unvaccinated, df_booster])
df_all_LR = df_all_LR.drop_duplicates(subset='seq_id')
df_all_LR = df_all_LR[df_all_LR['sample_date_lab']>=earliest_sample_date]
df_all_LR['sample_date_lab'] = df_all_LR['sample_date_lab'] - df_all_LR['sample_date_lab'].min()


df_all_LR['age 30-39'] = df_all_LR['age'].apply(lambda x: 1 if (x>=30 and x<40) else 0)
df_all_LR['age 40-49'] = df_all_LR['age'].apply(lambda x: 1 if (x>=40 and x<50) else 0)
df_all_LR['age 50-59'] = df_all_LR['age'].apply(lambda x: 1 if (x>=50 and x<60) else 0)
df_all_LR['age 60-69'] = df_all_LR['age'].apply(lambda x: 1 if (x>=60 and x<70) else 0)
df_all_LR['age 70-79'] = df_all_LR['age'].apply(lambda x: 1 if (x>=70 and x<80) else 0)
df_all_LR['age 80-89'] = df_all_LR['age'].apply(lambda x: 1 if (x>=80 and x<90) else 0)
df_all_LR['age 90-99'] = df_all_LR['age'].apply(lambda x: 1 if (x>=90 and x<100) else 0)
df_all_LR['age 100-109'] = df_all_LR['age'].apply(lambda x: 1 if (x>=100 and x<110) else 0)


df_all_LR = df_all_LR[df_all_LR['age']>=50]
df_all_LR = df_all_LR[df_all_LR['age']<=120]

ylim = [9.2, -1.4]

reg_coef1 = binning_names(bins) + ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'booster']
reg_coef_present1 = binning_names(bins) + ['booster']
reg_coef2 =  ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'vaccinated', 'booster']
reg_coef_present2 = ['vaccinated', 'booster']

name = "Extended Data Figure 2 - upper - above 50"

for gene in ['N', 'RdRp', 'E']:
    LR(reg_coef1, reg_coef_present1, reg_coef2, reg_coef_present2, df_all_LR, gene, ylim, name, False, (4, 9))
print(len(df_all_LR))

In [None]:
# January 1st - 366
# February 1st - 397
# March 1st - 425
# April 1st - 456
# May 1st - 486
# June 1st - 517
# July 1st - 547
# August 1st - 578

earliest_sample_date = 544

bins = [13, 30, 60, 120, 180]

df_booster = df_booster[df_booster['time boostered']>=14]

df_vaccinated = binning(df_vaccinated, bins, False, False)
df_booster = binning(df_booster, bins, False, False)
df_unvaccinated = binning(df_unvaccinated, bins, True, False)

df_vaccinated['vaccinated'] = 1
df_vaccinated['booster'] = 0

df_booster['vaccinated'] = 1
df_booster['booster'] = 1

df_unvaccinated['vaccinated'] = 0
df_unvaccinated['booster'] = 0

df_all_LR = pd.concat([df_vaccinated, df_unvaccinated, df_booster])
df_all_LR = df_all_LR.drop_duplicates(subset='seq_id')
df_all_LR = df_all_LR[df_all_LR['sample_date_lab']>=earliest_sample_date]
df_all_LR['sample_date_lab'] = df_all_LR['sample_date_lab'] - df_all_LR['sample_date_lab'].min()


df_all_LR['age 30-39'] = df_all_LR['age'].apply(lambda x: 1 if (x>=30 and x<40) else 0)
df_all_LR['age 40-49'] = df_all_LR['age'].apply(lambda x: 1 if (x>=40 and x<50) else 0)
df_all_LR['age 50-59'] = df_all_LR['age'].apply(lambda x: 1 if (x>=50 and x<60) else 0)
df_all_LR['age 60-69'] = df_all_LR['age'].apply(lambda x: 1 if (x>=60 and x<70) else 0)
df_all_LR['age 70-79'] = df_all_LR['age'].apply(lambda x: 1 if (x>=70 and x<80) else 0)
df_all_LR['age 80-89'] = df_all_LR['age'].apply(lambda x: 1 if (x>=80 and x<90) else 0)
df_all_LR['age 90-99'] = df_all_LR['age'].apply(lambda x: 1 if (x>=90 and x<100) else 0)
df_all_LR['age 100-109'] = df_all_LR['age'].apply(lambda x: 1 if (x>=100 and x<110) else 0)


df_all_LR = df_all_LR[df_all_LR['age']>=20]
df_all_LR = df_all_LR[df_all_LR['age']<=120]

ylim = [5.8, -0.2]

reg_coef1 = binning_names(bins) + ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'booster']
reg_coef_present1 = binning_names(bins) + ['booster']
reg_coef2 =  ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'vaccinated', 'booster']
reg_coef_present2 = ['vaccinated', 'booster']

name = "Extended Data Figure 3"

for gene in ['N', 'RdRp', 'E']:
    LR(reg_coef1, reg_coef_present1, reg_coef2, reg_coef_present2, df_all_LR, gene, ylim, name, False, (4, 9))

    
file_name = r"df_boosters.pkl"
file_path = project_path + "\\" + file_name
df_booster = pd.read_pickle(file_path)

df_booster = df_booster[df_booster['immunosuppression']==0]

In [None]:
# January 1st - 366
# February 1st - 397
# March 1st - 425
# April 1st - 456
# May 1st - 486
# June 1st - 517
# July 1st - 547
# August 1st - 578

earliest_sample_date = 544

bins = [6, 30, 60, 120, 180]

df_vaccinated = binning(df_vaccinated, bins, False, False)
df_booster = binning(df_booster, bins, False, False)
df_unvaccinated = binning(df_unvaccinated, bins, True, False)

df_vaccinated['vaccinated'] = 1
df_vaccinated['booster'] = 0

df_booster['vaccinated'] = 1
df_booster['booster'] = 1

df_unvaccinated['vaccinated'] = 0
df_unvaccinated['booster'] = 0

df_all_LR = pd.concat([df_vaccinated, df_unvaccinated, df_booster])
df_all_LR = df_all_LR.drop_duplicates(subset='seq_id')
df_all_LR = df_all_LR[df_all_LR['sample_date_lab']>=earliest_sample_date]
df_all_LR['sample_date_lab'] = df_all_LR['sample_date_lab'] - df_all_LR['sample_date_lab'].min()

df_all_LR['age 30-39'] = df_all_LR['age'].apply(lambda x: 1 if (x>=30 and x<40) else 0)
df_all_LR['age 40-49'] = df_all_LR['age'].apply(lambda x: 1 if (x>=40 and x<50) else 0)
df_all_LR['age 50-59'] = df_all_LR['age'].apply(lambda x: 1 if (x>=50 and x<60) else 0)
df_all_LR['age 60-69'] = df_all_LR['age'].apply(lambda x: 1 if (x>=60 and x<70) else 0)
df_all_LR['age 70-79'] = df_all_LR['age'].apply(lambda x: 1 if (x>=70 and x<80) else 0)
df_all_LR['age 80-89'] = df_all_LR['age'].apply(lambda x: 1 if (x>=80 and x<90) else 0)
df_all_LR['age 90-99'] = df_all_LR['age'].apply(lambda x: 1 if (x>=90 and x<100) else 0)
df_all_LR['age 100-109'] = df_all_LR['age'].apply(lambda x: 1 if (x>=100 and x<110) else 0)


df_all_LR = df_all_LR[df_all_LR['age']>=20]
df_all_LR = df_all_LR[df_all_LR['age']<=120]

df_all_LR = df_all_LR[~df_all_LR['seq_id'].isin(hospital['seq_id'])]

ylim = [6.1, -0.2]

reg_coef1 = binning_names(bins) + ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'booster']
reg_coef_present1 = binning_names(bins) + ['booster']
reg_coef2 =  ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'vaccinated', 'booster']
reg_coef_present2 = ['vaccinated', 'booster']

name = "Extended Data Figure 4"

for gene in ['N', 'RdRp', 'E']:
    LR(reg_coef1, reg_coef_present1, reg_coef2, reg_coef_present2, df_all_LR, gene, ylim, name, False, (4, 9))
    
print(len(df_all_LR))

In [None]:
# January 1st - 366
# February 1st - 397
# March 1st - 425
# April 1st - 456
# May 1st - 486
# June 1st - 517
# July 1st - 547
# August 1st - 578

earliest_sample_date = 544

bins = [6, 30, 60, 120, 180]

df_vaccinated = binning(df_vaccinated, bins, False, False)
df_booster = binning(df_booster, bins, False, False)
df_unvaccinated = binning(df_unvaccinated, bins, True, False)

df_vaccinated['vaccinated'] = 1
df_vaccinated['booster'] = 0

df_booster['vaccinated'] = 1
df_booster['booster'] = 1

df_unvaccinated['vaccinated'] = 0
df_unvaccinated['booster'] = 0

df_all_LR = pd.concat([df_vaccinated, df_unvaccinated, df_booster])
df_all_LR = df_all_LR.drop_duplicates(subset='seq_id')
df_all_LR = df_all_LR[df_all_LR['sample_date_lab']>=earliest_sample_date]
df_all_LR['sample_date_lab'] = df_all_LR['sample_date_lab'] - df_all_LR['sample_date_lab'].min()


df_all_LR['age 30-39'] = df_all_LR['age'].apply(lambda x: 1 if (x>=30 and x<40) else 0)
df_all_LR['age 40-49'] = df_all_LR['age'].apply(lambda x: 1 if (x>=40 and x<50) else 0)
df_all_LR['age 50-59'] = df_all_LR['age'].apply(lambda x: 1 if (x>=50 and x<60) else 0)
df_all_LR['age 60-69'] = df_all_LR['age'].apply(lambda x: 1 if (x>=60 and x<70) else 0)
df_all_LR['age 70-79'] = df_all_LR['age'].apply(lambda x: 1 if (x>=70 and x<80) else 0)
df_all_LR['age 80-89'] = df_all_LR['age'].apply(lambda x: 1 if (x>=80 and x<90) else 0)
df_all_LR['age 90-99'] = df_all_LR['age'].apply(lambda x: 1 if (x>=90 and x<100) else 0)
df_all_LR['age 100-109'] = df_all_LR['age'].apply(lambda x: 1 if (x>=100 and x<110) else 0)


df_all_LR = df_all_LR[df_all_LR['age']>=20]
df_all_LR = df_all_LR[df_all_LR['age']<50]

ylim = [5.6, -0.2]

reg_coef1 = binning_names(bins) + ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'booster']
reg_coef_present1 = binning_names(bins) + ['booster']
reg_coef2 =  ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'vaccinated', 'booster']
reg_coef_present2 = ['vaccinated', 'booster']

name = "Extended Data Figure 2 - lower - under 50"

for gene in ['N', 'RdRp', 'E']:
    LR(reg_coef1, reg_coef_present1, reg_coef2, reg_coef_present2, df_all_LR, gene, ylim, name, False, (4, 9))

In [None]:
# January 1st - 366
# February 1st - 397
# March 1st - 425
# April 1st - 456
# May 1st - 486
# June 1st - 517
# July 1st - 547
# August 1st - 578

earliest_sample_date = 544

bins = [6, 60, 180]

df_vaccinated = binning(df_vaccinated, bins, False, True)
df_booster = binning(df_booster, bins, False, True)
df_unvaccinated = binning(df_unvaccinated, bins, True, True)

df_vaccinated['vaccinated'] = 1
df_vaccinated['booster'] = 0

df_booster['vaccinated'] = 1
df_booster['booster'] = 1

df_unvaccinated['vaccinated'] = 0
df_unvaccinated['booster'] = 0

df_all_LR = pd.concat([df_vaccinated, df_unvaccinated, df_booster])
df_all_LR = df_all_LR.drop_duplicates(subset='seq_id')
df_all_LR = df_all_LR[df_all_LR['sample_date_lab']>=earliest_sample_date]
df_all_LR['sample_date_lab'] = df_all_LR['sample_date_lab'] - df_all_LR['sample_date_lab'].min()


df_all_LR['age 30-39'] = df_all_LR['age'].apply(lambda x: 1 if (x>=30 and x<40) else 0)
df_all_LR['age 40-49'] = df_all_LR['age'].apply(lambda x: 1 if (x>=40 and x<50) else 0)
df_all_LR['age 50-59'] = df_all_LR['age'].apply(lambda x: 1 if (x>=50 and x<60) else 0)
df_all_LR['age 60-69'] = df_all_LR['age'].apply(lambda x: 1 if (x>=60 and x<70) else 0)
df_all_LR['age 70-79'] = df_all_LR['age'].apply(lambda x: 1 if (x>=70 and x<80) else 0)
df_all_LR['age 80-89'] = df_all_LR['age'].apply(lambda x: 1 if (x>=80 and x<90) else 0)
df_all_LR['age 90-99'] = df_all_LR['age'].apply(lambda x: 1 if (x>=90 and x<100) else 0)
df_all_LR['age 100-109'] = df_all_LR['age'].apply(lambda x: 1 if (x>=100 and x<110) else 0)


df_all_LR = df_all_LR[df_all_LR['age']>=20]
df_all_LR = df_all_LR[df_all_LR['age']<=120]

ylim = [5, -5]

reg_coef1 = binning_names(bins) + ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'booster']
reg_coef_present1 = binning_names(bins) + ['booster']
reg_coef2 =  ['sample_date_lab', 'age 30-39', 'age 40-49', 'age 50-59', 'age 60-69', 'age 70-79', 'age 80-89', 'age 90-99', 'age 100-109', 'sex', 'vaccinated', 'booster']
reg_coef_present2 = ['vaccinated', 'booster']

name = "Decrease"

for gene in ['N', 'RdRp', 'E']:
    LR(reg_coef1, reg_coef_present1, reg_coef2, reg_coef_present2, df_all_LR, gene, ylim, name, True, (18, 18))