In [1]:
import pandas as pd
import re

In [2]:
df = pd.read_csv('2024-05-05 H1N1 Dynamics.csv')
df

Unnamed: 0,Dataset,Virus,Subject,Age,Time,Measurement
0,2016 UGA,H1N1 A/South Carolina/1/1918,ID_001_UGA2016,29,Day0,2
1,2016 UGA,H1N1 A/Weiss/JY2/1943,ID_001_UGA2016,29,Day0,4
2,2016 UGA,H1N1 A/Fort Monmouth/1/1947,ID_001_UGA2016,29,Day0,1
3,2016 UGA,H1N1 A/Denver/1/1957,ID_001_UGA2016,29,Day0,0
4,2016 UGA,H1N1 A/New Jersey/8/1976,ID_001_UGA2016,29,Day0,0
...,...,...,...,...,...,...
36056,2021 UGA,H1N1 A/Victoria/2570/2019,ID_252C_UGA2021,14,Day0,2
36057,2021 UGA,H1N1 A/California/7/2009,ID_252C_UGA2021,14,Day33,9
36058,2021 UGA,H1N1 A/Brisbane/2/2018,ID_252C_UGA2021,14,Day33,8
36059,2021 UGA,H1N1 A/Guangdong-Maonan/SWL1536/2019,ID_252C_UGA2021,14,Day33,5


At this point, there are 36,061 rows of data. The first step is to combine data that is related to a subject so that the trends can be observed per subject. The way to do that is to isolate subjects based on the number in between their IDs, which will be referred to as a base ID. Although the IDs are going to be different for every study, subject IDs that share the same base IDs will be treated as the same subject because they are the same subject in different years.

In [6]:
def extract_base_id(subject_id):
    match = re.match(r"ID_(\w+)_UGA\d{4}", subject_id)
    return match.group(1) if match else subject_id

In [8]:
df['base_id'] = df['Subject'].apply(extract_base_id)
df

Unnamed: 0,Dataset,Virus,Subject,Age,Time,Measurement,base_id
0,2016 UGA,H1N1 A/South Carolina/1/1918,ID_001_UGA2016,29,Day0,2,001
1,2016 UGA,H1N1 A/Weiss/JY2/1943,ID_001_UGA2016,29,Day0,4,001
2,2016 UGA,H1N1 A/Fort Monmouth/1/1947,ID_001_UGA2016,29,Day0,1,001
3,2016 UGA,H1N1 A/Denver/1/1957,ID_001_UGA2016,29,Day0,0,001
4,2016 UGA,H1N1 A/New Jersey/8/1976,ID_001_UGA2016,29,Day0,0,001
...,...,...,...,...,...,...,...
36056,2021 UGA,H1N1 A/Victoria/2570/2019,ID_252C_UGA2021,14,Day0,2,252C
36057,2021 UGA,H1N1 A/California/7/2009,ID_252C_UGA2021,14,Day33,9,252C
36058,2021 UGA,H1N1 A/Brisbane/2/2018,ID_252C_UGA2021,14,Day33,8,252C
36059,2021 UGA,H1N1 A/Guangdong-Maonan/SWL1536/2019,ID_252C_UGA2021,14,Day33,5,252C


Now that there is a base ID section in each row, this base ID will be used to create a disctionary in each row that contains a key of the viruses that the subject was vaccinated against. Inside that key's value, there will be a key of the dataset and the vaccine response measurements as the value.

In [11]:
def create_virus_dict(group):
    virus_dict = {}
    for virus, virus_group in group.groupby('Virus'):
        year_dict = {}
        for year, year_group in virus_group.groupby('Dataset'):
            year_dict[year] = year_group.sort_values('Time')['Measurement'].tolist()
        virus_dict[virus] = year_dict
    return virus_dict

In [13]:
result = df.groupby('base_id').apply(create_virus_dict).reset_index(name='Virus_Year_Measurements')

  result = df.groupby('base_id').apply(create_virus_dict).reset_index(name='Virus_Year_Measurements')


In [14]:
result

Unnamed: 0,base_id,Virus_Year_Measurements
0,001,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [3, 3..."
1,001C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 2..."
2,002,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3..."
3,003,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3..."
4,004,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 3..."
...,...,...
739,560,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]..."
740,561,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [6, 6,..."
741,562,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]..."
742,563,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [3, 4]..."


Now the improvement in vaccine response measurements will be placed in an array. The value of each element represents the change in the improvement in the first and the last element of the array of vaccine measurements associated with each virus. The first element is the Day 0 measurement and the last element is the measurement taken on the final day of that test in that study.

In [18]:
improvement_arr = []
for i in range(len(result['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in result['Virus_Year_Measurements'][i]:
        for dataset in result['Virus_Year_Measurements'][i][virus]:
            delta = result['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)

In [20]:
result['Improvement'] = improvement_arr
result

Unnamed: 0,base_id,Virus_Year_Measurements,Improvement
0,001,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [3, 3...","[0, 0, 0, 0, 4, 2, 5, 1, 0, 0, 4, 1, 1, 0, 1, ..."
1,001C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 2...","[0, 1, 0, 0, 0, -1, 2, -1, 0, 0, 1, 1, 2, -1, ..."
2,002,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3...","[-1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0..."
3,003,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3...","[-1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 1]"
4,004,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 3...","[2, 0, 2, 4, 0, 1, 0, 4, 2, 2, 2, 2, 3, 0, 0, 4]"
...,...,...,...
739,560,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]...","[1, 1, 1, 3]"
740,561,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [6, 6,...","[0, 1, 0, 1]"
741,562,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]...","[1, 2, 1, 2]"
742,563,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [3, 4]...","[1, 1, 1, 1]"


Now the average improvement of each subject will be calculated by taking the average of the improvement array.

In [23]:
average_arr = []
for i in range(len(result['Improvement'])):
   average_arr.append(sum(result['Improvement'][i])/len(result['Improvement'][i]))

In [25]:
result['Average Improvement'] = average_arr
result

Unnamed: 0,base_id,Virus_Year_Measurements,Improvement,Average Improvement
0,001,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [3, 3...","[0, 0, 0, 0, 4, 2, 5, 1, 0, 0, 4, 1, 1, 0, 1, ...",1.16129
1,001C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 2...","[0, 1, 0, 0, 0, -1, 2, -1, 0, 0, 1, 1, 2, -1, ...",0.36000
2,002,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3...","[-1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0...",-0.06250
3,003,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3...","[-1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 1]",0.37500
4,004,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 3...","[2, 0, 2, 4, 0, 1, 0, 4, 2, 2, 2, 2, 3, 0, 0, 4]",1.75000
...,...,...,...,...
739,560,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]...","[1, 1, 1, 3]",1.50000
740,561,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [6, 6,...","[0, 1, 0, 1]",0.50000
741,562,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]...","[1, 2, 1, 2]",1.50000
742,563,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [3, 4]...","[1, 1, 1, 1]",1.00000


In [27]:
improvement_ct = 0
for i in range(len(result)):
   if result['Average Improvement'][i] > 0:
       improvement_ct += 1

In [29]:
improvement_ct/len(result)

0.7983870967741935

Approximately 79.839% of all patients have shown improvement. Now we will split them into two groups: One-year subjects and Multi-year subjects

In order to confirm that a subject's vaccination response was studied only for a year, the subject's age and the associated dataset will be displayed on the table to ensure that the subject's ID corresponds to one study or multiple studies.

In [33]:
age_set = df.groupby('base_id').agg({'Age':set}).reset_index()

In [35]:
datasets = df.groupby('base_id').agg({'Dataset':set}).reset_index()

In [37]:
age_set_list = []
dataset_list = []
for i in range(len(age_set['Age'])):
    age_set_list.append(age_set['Age'][i])
for i in range(len(datasets['Dataset'])):
    dataset_list.append(datasets['Dataset'][i])

In [39]:
result['Age'] = age_set_list
result['Dataset'] = dataset_list

Now the subjects will be split into the one year and multi-year groups

In [42]:
one_yr_group = []
multi_yr_group = []
for i in range(len(result['Virus_Year_Measurements'])):
    all_lengths_one = all(len(value) == 1 for value in result['Virus_Year_Measurements'][i].values())
    if all_lengths_one:
        one_yr_group.append(result.loc[i])
    else:
        multi_yr_group.append(result.loc[i])

In [44]:
one_yr_group = pd.DataFrame(one_yr_group).reset_index()

In [46]:
one_yr_group

Unnamed: 0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset
0,2,002,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3...","[-1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0...",-0.0625,{29},{2016 UGA}
1,3,003,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3...","[-1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 1]",0.3750,{28},{2016 UGA}
2,4,004,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 3...","[2, 0, 2, 4, 0, 1, 0, 4, 2, 2, 2, 2, 3, 0, 0, 4]",1.7500,{27},{2016 UGA}
3,8,006,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 1...","[0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]",0.1875,{35},{2016 UGA}
4,14,009,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 5...","[1, 0, 1, 2, 0, 0, 0, 1, 0, 0, -1, 1, 0, -1, 1...",0.3125,{61},{2016 UGA}
...,...,...,...,...,...,...,...
256,739,560,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]...","[1, 1, 1, 3]",1.5000,{18},{2021 UGA}
257,740,561,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [6, 6,...","[0, 1, 0, 1]",0.5000,{18},{2021 UGA}
258,741,562,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]...","[1, 2, 1, 2]",1.5000,{18},{2021 UGA}
259,742,563,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [3, 4]...","[1, 1, 1, 1]",1.0000,{18},{2021 UGA}


In [48]:
improvement_ct = 0
for i in range(len(one_yr_group)):
   if one_yr_group['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(one_yr_group)

0.8467432950191571

In [50]:
dataset_one_yr = []
for i in range(len(datasets['Dataset'])):
    if len(datasets['Dataset'][i]) == 1:
        dataset_one_yr.append(datasets.loc[i])
dataset_one_yr = pd.DataFrame(dataset_one_yr).reset_index()

In [52]:
dataset_one_yr

Unnamed: 0,index,base_id,Dataset
0,2,002,{2016 UGA}
1,3,003,{2016 UGA}
2,4,004,{2016 UGA}
3,8,006,{2016 UGA}
4,14,009,{2016 UGA}
...,...,...,...
256,739,560,{2021 UGA}
257,740,561,{2021 UGA}
258,741,562,{2021 UGA}
259,742,563,{2021 UGA}


The indices and subject base IDs match with the table before, which means that the data matches. Now the same process will be applied onto the multiple year group.

In [55]:
multi_yr_group = pd.DataFrame(multi_yr_group).reset_index()
multi_yr_group

Unnamed: 0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset
0,0,001,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [3, 3...","[0, 0, 0, 0, 4, 2, 5, 1, 0, 0, 4, 1, 1, 0, 1, ...",1.161290,"{32, 33, 29, 31}","{2016 UGA, 2019 UGA, 2021 UGA, 2020 UGA}"
1,1,001C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 2...","[0, 1, 0, 0, 0, -1, 2, -1, 0, 0, 1, 1, 2, -1, ...",0.360000,"{16, 15}","{2017 UGA, 2018 UGA}"
2,5,004C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [3, 3...","[0, 0, 0, 1, 1, 0, -1, 0, -1, -1, 1, 0, -4, 0,...",0.055556,"{16, 17, 14, 15}","{2017 UGA, 2019 UGA, 2018 UGA, 2020 UGA}"
3,6,005,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 0...","[-1, 0, 0, 0, 0, -1, 0, 2, 1, -1, 0, 0, 0, 0, ...",0.000000,"{25, 26, 27, 28, 29}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
4,7,005C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [2, 2...","[0, 0, 0, 1, 0, 2, 0, -3, 0, 0, 0, 4, -1, 1, 0...",0.440000,"{16, 17}","{2017 UGA, 2018 UGA}"
...,...,...,...,...,...,...,...
478,721,526,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [5, 4,...","[1, 1, 0, 0, 0, -2, 1, 0, 0, 0, 0, 0, 0, -1, 0]",0.000000,"{27, 28}","{2019 UGA, 2020 UGA, 2021 UGA}"
479,726,531,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [2, 2,...","[2, 0, 3, 1, 3, 0, 3, 0, 2, 2, 1, 1, 3, 0, 3]",1.600000,"{19, 20, 21}","{2019 UGA, 2020 UGA, 2021 UGA}"
480,727,535,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [1, 6]...","[5, 0, 0, 1, -1, 0, 5, -1, 4, 0]",1.300000,"{65, 63}","{2019 UGA, 2021 UGA}"
481,728,539,"{'H1N1 A/Brisbane/2/2018': {'2020 UGA': [0, 5,...","[5, 0, 1, 0, 4, 0, 2, 1, 0]",1.444444,{18},"{2020 UGA, 2021 UGA}"


In [57]:
improvement_ct = 0
for i in range(len(multi_yr_group)):
   if multi_yr_group['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(multi_yr_group)

0.772256728778468

In [59]:
dataset_multi_yr = []
for i in range(len(datasets['Dataset'])):
    if len(datasets['Dataset'][i]) != 1:
        dataset_multi_yr.append(datasets.loc[i])
dataset_multi_yr = pd.DataFrame(dataset_multi_yr)

In [61]:
dataset_multi_yr

Unnamed: 0,base_id,Dataset
0,001,"{2016 UGA, 2019 UGA, 2021 UGA, 2020 UGA}"
1,001C,"{2017 UGA, 2018 UGA}"
5,004C,"{2017 UGA, 2019 UGA, 2018 UGA, 2020 UGA}"
6,005,"{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
7,005C,"{2017 UGA, 2018 UGA}"
...,...,...
721,526,"{2019 UGA, 2020 UGA, 2021 UGA}"
726,531,"{2019 UGA, 2020 UGA, 2021 UGA}"
727,535,"{2019 UGA, 2021 UGA}"
728,539,"{2020 UGA, 2021 UGA}"


Indices match so it is confirmed that the table of vaccine data relates to the multi-year patients.

In [64]:
new_age = []

for i in range(len(one_yr_group)):
    new_age_list = list(one_yr_group['Age'][i])
    new_age.append(new_age_list[0])

one_yr_group['Updated Age'] = new_age

Each group will be split into 6 groups as listed in the comments next to the definition of the array of each group.

In [67]:
group1_one_yr = [] # Infants(<1 year old)
group2_one_yr = [] # Children(1-11 years old)
group3_one_yr = [] # Adolescents(12-17 years old)
group4_one_yr = [] # Adults(18-44 years old)
group5_one_yr = [] # Middle Aged Adults(45-64 years old)
group6_one_yr = [] # Seniors(>65 years old)
for i in range(len(one_yr_group)):
    if one_yr_group['Updated Age'][i] < 1:
        group1_one_yr.append(one_yr_group.loc[i])
    elif one_yr_group['Updated Age'][i] < 12:
        group2_one_yr.append(one_yr_group.loc[i])
    elif one_yr_group['Updated Age'][i] < 18:
        group3_one_yr.append(one_yr_group.loc[i])
    elif one_yr_group['Updated Age'][i] < 45:
        group4_one_yr.append(one_yr_group.loc[i])
    elif one_yr_group['Updated Age'][i] < 65:
        group5_one_yr.append(one_yr_group.loc[i])
    else:
        group6_one_yr.append(one_yr_group.loc[i])

In [69]:
group1_one_yr

[]

There's no available data for infants.

In [72]:
group2_one_yr = pd.DataFrame(group2_one_yr).reset_index()
group2_one_yr
improvement_arr = []
for i in range(len(group2_one_yr['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in group2_one_yr['Virus_Year_Measurements'][i]:
        for dataset in group2_one_yr['Virus_Year_Measurements'][i][virus]:
            delta = group2_one_yr['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)
group2_one_yr['Improvement'] = improvement_arr
average_arr = []
for i in range(len(group2_one_yr['Improvement'])):
   average_arr.append(sum(group2_one_yr['Improvement'][i])/len(group2_one_yr['Improvement'][i]))
group2_one_yr['Average Improvement'] = average_arr
group2_one_yr

Unnamed: 0,level_0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset,Updated Age
0,118,369,206C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [3, 6,...","[1, 2, -1, 2]",1.0,{11},{2021 UGA},11
1,119,371,207C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [7, 6,...","[0, 1, 0, 1]",0.5,{11},{2021 UGA},11
2,124,382,212C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [5, 5,...","[0, 1, -1, 0]",0.0,{10},{2021 UGA},10
3,127,388,215C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [0, 0]...","[0, 2, 3, 2]",1.75,{11},{2021 UGA},11
4,129,390,216C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [3, 8]...","[5, 4, 3, 5]",4.25,{10},{2021 UGA},10
5,131,395,219C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [3, 3]...","[0, 0, 0, 1]",0.25,{10},{2021 UGA},10
6,132,397,220C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [5, 5]...","[0, 0, 0, 1]",0.25,{10},{2021 UGA},10
7,133,399,221C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 4]...","[0, 1, 0, 3]",1.0,{11},{2021 UGA},11
8,136,403,223C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [5, 7]...","[2, 1, 1, 0]",1.0,{10},{2021 UGA},10
9,139,408,226C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [5, 7]...","[2, 2, 2, 3]",2.25,{11},{2021 UGA},11


In [74]:
avg_improvement = sum(group2_one_yr['Average Improvement'])/len(group2_one_yr)
avg_improvement

1.375

In [76]:
improvement_ct = 0
for i in range(len(group2_one_yr)):
   if group2_one_yr['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(group2_one_yr)

0.875

The average improvement for children that have been tested for one year is 1.375.

In [79]:
group3_one_yr = pd.DataFrame(group3_one_yr).reset_index()
improvement_arr = []
for i in range(len(group3_one_yr['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in group3_one_yr['Virus_Year_Measurements'][i]:
        for dataset in group3_one_yr['Virus_Year_Measurements'][i][virus]:
            delta = group3_one_yr['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)
group3_one_yr['Improvement'] = improvement_arr
average_arr = []
for i in range(len(group3_one_yr['Improvement'])):
   average_arr.append(sum(group3_one_yr['Improvement'][i])/len(group3_one_yr['Improvement'][i]))
group3_one_yr['Average Improvement'] = average_arr
group3_one_yr

Unnamed: 0,level_0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset,Updated Age
0,5,17,010C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [2, 2...","[0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0]",0.2500,{15},{2017 UGA},15
1,6,19,011C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [3, 3...","[0, 0, 0, -1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0]",0.1875,{17},{2017 UGA},17
2,8,21,012C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [2, 2...","[0, 0, 2, 3, 0, 0, 0, 2, 1, 2, 2, 0, 2, 1, 0, 0]",0.9375,{17},{2017 UGA},17
3,10,40,022C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [4, 4...","[-1, 0, 0, -1, -4, 1, 0, 0, 0, 3, -1, -1, 3, -...",0.0000,{17},{2017 UGA},17
4,16,60,033C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [1, 1...","[0, 0, 1, 5, 0, 0, 0, 6, 1, 6, 3, -2, 5, 2, 0, 0]",1.6875,{17},{2017 UGA},17
...,...,...,...,...,...,...,...,...,...
73,166,444,246C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [6, 6]...","[0, 1, 0, 1]",0.5000,{13},{2021 UGA},13
74,167,447,248C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [2, 3]...","[1, 2, 0, 1]",1.0000,{14},{2021 UGA},14
75,169,451,250C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [6, 6]...","[0, 2, 1, 1]",1.0000,{13},{2021 UGA},13
76,170,453,251C,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [3, 6]...","[3, 4, 2, 3]",3.0000,{12},{2021 UGA},12


In [81]:
avg_improvement = sum(group3_one_yr['Average Improvement'])/len(group3_one_yr)
avg_improvement

1.0338319088319088

In [83]:
improvement_ct = 0
for i in range(len(group3_one_yr)):
   if group3_one_yr['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(group3_one_yr)

0.8717948717948718

The average improvement in adolescents who were tested for 1 year is 1.03383.

In [86]:
group4_one_yr = pd.DataFrame(group4_one_yr).reset_index()
improvement_arr = []
for i in range(len(group4_one_yr['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in group4_one_yr['Virus_Year_Measurements'][i]:
        for dataset in group4_one_yr['Virus_Year_Measurements'][i][virus]:
            delta = group4_one_yr['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)
group4_one_yr['Improvement'] = improvement_arr
average_arr = []
for i in range(len(group4_one_yr['Improvement'])):
   average_arr.append(sum(group4_one_yr['Improvement'][i])/len(group4_one_yr['Improvement'][i]))
group4_one_yr['Average Improvement'] = average_arr
group4_one_yr

Unnamed: 0,level_0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset,Updated Age
0,0,2,002,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3...","[-1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0...",-0.0625,{29},{2016 UGA},29
1,1,3,003,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 3...","[-1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 1]",0.3750,{28},{2016 UGA},28
2,2,4,004,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 3...","[2, 0, 2, 4, 0, 1, 0, 4, 2, 2, 2, 2, 3, 0, 0, 4]",1.7500,{27},{2016 UGA},27
3,3,8,006,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 1...","[0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]",0.1875,{35},{2016 UGA},35
4,7,20,012,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 4...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 1, 0]",0.0000,{24},{2016 UGA},24
...,...,...,...,...,...,...,...,...,...
108,256,739,560,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]...","[1, 1, 1, 3]",1.5000,{18},{2021 UGA},18
109,257,740,561,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [6, 6,...","[0, 1, 0, 1]",0.5000,{18},{2021 UGA},18
110,258,741,562,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [4, 5]...","[1, 2, 1, 2]",1.5000,{18},{2021 UGA},18
111,259,742,563,"{'H1N1 A/Brisbane/2/2018': {'2021 UGA': [3, 4]...","[1, 1, 1, 1]",1.0000,{18},{2021 UGA},18


In [88]:
avg_improvement = sum(group4_one_yr['Average Improvement'])/len(group4_one_yr)
avg_improvement

0.9637045231071779

In [90]:
improvement_ct = 0
for i in range(len(group4_one_yr)):
   if group4_one_yr['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(group4_one_yr)

0.8584070796460177

The average improvement in adults 18-44 who were vaccinated for one year is 0.9637.

In [93]:
group5_one_yr = pd.DataFrame(group5_one_yr).reset_index()
improvement_arr = []
for i in range(len(group5_one_yr['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in group5_one_yr['Virus_Year_Measurements'][i]:
        for dataset in group5_one_yr['Virus_Year_Measurements'][i][virus]:
            delta = group5_one_yr['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)
group5_one_yr['Improvement'] = improvement_arr
average_arr = []
for i in range(len(group5_one_yr['Improvement'])):
   average_arr.append(sum(group5_one_yr['Improvement'][i])/len(group5_one_yr['Improvement'][i]))
group5_one_yr['Average Improvement'] = average_arr
group5_one_yr

Unnamed: 0,level_0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset,Updated Age
0,4,14,9,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 5...","[1, 0, 1, 2, 0, 0, 0, 1, 0, 0, -1, 1, 0, -1, 1...",0.3125,{61},{2016 UGA},61
1,39,147,79,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 1...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0]",0.125,{60},{2016 UGA},60
2,56,183,98,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 1...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -5, 0]",-0.3125,{55},{2016 UGA},55
3,59,190,102,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [3, 3...","[0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0]",0.3125,{47},{2016 UGA},47
4,66,210,113,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [2, 2...","[0, 2, 3, 5, 3, 1, 2, 4, 1, 2, 2, 1, 3, 0, 3, 1]",2.0625,{50},{2016 UGA},50
5,76,243,131,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [2, 0...","[-2, -1, 0, 0, 0, 0, 0, 0, -1, -1, 0, -1, 0, 0...",-0.375,{62},{2016 UGA},62
6,89,300,163,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 1...","[1, 0, 0, 3, 1, 0, 0, 4, -1, 1, 1, 0, 2, 0, 0, 1]",0.8125,{51},{2017 UGA},51
7,96,318,173,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 1...","[1, 1, -1, 1, 0, 0, 0, 2, 0, 1, -1, 1, 0, 0, 1...",0.4375,{64},{2017 UGA},64
8,123,381,212,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [2, 3...","[1, 1, 0, 2, 1, 0, 1, 2, 0, 1, 1, 0, 0, 0, 1, 0]",0.6875,{46},{2017 UGA},46
9,135,402,223,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 2...","[2, -1, 0, 1, 0, 0, -1, 0, 0, 1, -1, 1, 0, 0, ...",0.125,{46},{2017 UGA},46


In [95]:
avg_improvement = sum(group5_one_yr['Average Improvement'])/len(group5_one_yr)
avg_improvement

0.8615196078431373

In [97]:
improvement_ct = 0
for i in range(len(group5_one_yr)):
   if group5_one_yr['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(group5_one_yr)

0.7647058823529411

The average improvement in older adults 45-62 who were vaccinated for a year is 0.8615.

In [100]:
group6_one_yr = pd.DataFrame(group6_one_yr).reset_index()
improvement_arr = []
for i in range(len(group6_one_yr['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in group6_one_yr['Virus_Year_Measurements'][i]:
        for dataset in group6_one_yr['Virus_Year_Measurements'][i][virus]:
            delta = group6_one_yr['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)
group6_one_yr['Improvement'] = improvement_arr
average_arr = []
for i in range(len(group6_one_yr['Improvement'])):
   average_arr.append(sum(group6_one_yr['Improvement'][i])/len(group6_one_yr['Improvement'][i]))
group6_one_yr['Average Improvement'] = average_arr
group6_one_yr

Unnamed: 0,level_0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset,Updated Age
0,54,179,96,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 1...","[0, 0, 0, 3, 1, 0, 0, 3, 1, 3, 1, 2, 3, -1, 0, 0]",1.0,{67},{2016 UGA},67
1,87,294,160,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 0...","[0, 0, -1, 4, 0, 0, 0, 4, 0, 0, 0, -1, 0, 0, 0...",0.3125,{68},{2017 UGA},68
2,93,308,168,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [1, 1...","[0, 0, 0, -1, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 0...",-0.0625,{83},{2017 UGA},83
3,95,316,172,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 2...","[2, 1, 1, 5, 0, 1, 1, 3, 1, 3, 0, 0, 2, 1, 1, 1]",1.4375,{66},{2017 UGA},66
4,98,324,176,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [2, 2...","[0, -1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, -1, 0...",0.0625,{71},{2017 UGA},71
5,99,326,178,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 0...","[0, 0, 0, 2, 0, 0, -1, 2, 0, 1, 0, 0, 1, 0, 0, 0]",0.3125,{67},{2017 UGA},67
6,100,329,180,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [2, 2...","[0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, -1, 0]",0.125,{67},{2017 UGA},67
7,102,333,182,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [1, 1...","[0, 0, -1, 0, 0, 0, 0, 3, 0, 0, 0, 0, -1, 0, 0...",0.0625,{67},{2017 UGA},67
8,103,338,186,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 0...","[0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, -1, 0, 0, 0...",-0.0625,{67},{2017 UGA},67
9,108,346,192,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [1, 2...","[1, 0, 0, 3, 0, 0, -1, 2, 1, 0, 0, 0, 0, 0, 0, 0]",0.375,{74},{2017 UGA},74


In [102]:
avg_improvement = sum(group6_one_yr['Average Improvement'])/len(group6_one_yr)
avg_improvement

0.5572916666666667

In [104]:
improvement_ct = 0
for i in range(len(group6_one_yr)):
   if group6_one_yr['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(group6_one_yr)

0.8

In [106]:
one_yr_group_avg = ['N/A', 1.375, 1.0338319088319088, 0.9637045231071779, 0.8615196078431373, 0.5572916666666667]
one_yr_group_improvement = ['N/A', 0.875*100, 0.8717948717948718*100, 0.8584070796460177*100, 0.7647058823529411*100, 0.8*100]

The average improvement in seniors 0.55729. The trends show that as the age of the one year subjects increase, the overall improvement decreases.

In [109]:
group1_multi_yr = [] # Infants(<1 year old)
group2_multi_yr = [] # Children(1-11 years old)
group3_multi_yr = [] # Adolescents(12-17 years old)
group4_multi_yr = [] # Adults(18-44 years old)
group5_multi_yr = [] # Middle Aged Adults(45-64 years old)
group6_multi_yr = [] # Seniors(>65 years old)
for i in range(len(multi_yr_group)):
    if max(multi_yr_group['Age'][i]) < 1:
        group1_multi_yr.append(multi_yr_group.loc[i])
    elif max(multi_yr_group['Age'][i]) < 12:
        group2_multi_yr.append(multi_yr_group.loc[i])
    elif max(multi_yr_group['Age'][i]) < 18:
        group3_multi_yr.append(multi_yr_group.loc[i])
    elif max(multi_yr_group['Age'][i]) < 45:
        group4_multi_yr.append(multi_yr_group.loc[i])
    elif max(multi_yr_group['Age'][i]) < 65:
        group5_multi_yr.append(multi_yr_group.loc[i])
    else:
        group6_multi_yr.append(multi_yr_group.loc[i])

In [111]:
group1_multi_yr

[]

There is no available data for infants who were vaccinated over multiple years.

In [114]:
group2_multi_yr

[]

There is no available data for children who were vaccinated over multiple years.

In [117]:
group3_multi_yr = pd.DataFrame(group3_multi_yr).reset_index()
improvement_arr = []
for i in range(len(group3_multi_yr['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in group3_multi_yr['Virus_Year_Measurements'][i]:
        for dataset in group3_multi_yr['Virus_Year_Measurements'][i][virus]:
            delta = group3_multi_yr['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)
group3_multi_yr['Improvement'] = improvement_arr
average_arr = []
for i in range(len(group3_multi_yr['Improvement'])):
   average_arr.append(sum(group3_multi_yr['Improvement'][i])/len(group3_multi_yr['Improvement'][i]))
group3_multi_yr['Average Improvement'] = average_arr
group3_multi_yr

Unnamed: 0,level_0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset
0,1,1,001C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 2...","[0, 1, 0, 0, 0, -1, 2, -1, 0, 0, 1, 1, 2, -1, ...",0.360000,"{16, 15}","{2017 UGA, 2018 UGA}"
1,2,5,004C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [3, 3...","[0, 0, 0, 1, 1, 0, -1, 0, -1, -1, 1, 0, -4, 0,...",0.055556,"{16, 17, 14, 15}","{2017 UGA, 2019 UGA, 2018 UGA, 2020 UGA}"
2,4,7,005C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [2, 2...","[0, 0, 0, 1, 0, 2, 0, -3, 0, 0, 0, 4, -1, 1, 0...",0.440000,"{16, 17}","{2017 UGA, 2018 UGA}"
3,5,9,006C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [3, 4...","[0, 0, 0, 1, 1, -1, 2, 2, -1, 1, -4, 0, 0, 1, ...",0.580645,"{16, 17, 15}","{2017 UGA, 2019 UGA, 2018 UGA}"
4,7,11,007C,"{'H1N1 A/Beijing/262/1995': {'2017 UGA': [0, 0...","[0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0,...",0.032258,"{16, 17, 15}","{2017 UGA, 2019 UGA, 2018 UGA}"
...,...,...,...,...,...,...,...,...
119,246,362,202C,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [4, 5,...","[0, 1, 0, 0, 0, 0, 2, 0, 0, 2, -1, -1, 1, 0, 0]",0.266667,"{13, 14}","{2019 UGA, 2020 UGA, 2021 UGA}"
120,249,365,204C,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [3, 5,...","[0, 1, 3, 2, 1, -1, 5, 0, 1, 1, 3, -1, 1, 0, 4]",1.333333,"{12, 13}","{2019 UGA, 2020 UGA, 2021 UGA}"
121,252,372,207CC,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [6, 6,...","[0, 0, 0, 0, 0, 0, -1, 0, -1, 1, 1]",0.000000,"{12, 13}","{2019 UGA, 2020 UGA}"
122,254,374,208C,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [6, 6]...","[0, 4, 0, 0, 2, 0, 1, 3, 0, 5]",1.500000,"{12, 14}","{2019 UGA, 2021 UGA}"


In [119]:
avg_improvement = sum(group3_multi_yr['Average Improvement'])/len(group3_multi_yr)
avg_improvement

0.2741436485969319

In [121]:
improvement_ct = 0
for i in range(len(group3_multi_yr)):
   if group3_multi_yr['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(group3_multi_yr)

0.7741935483870968

The average improvement in adolescents who were vaccinated over multiple years is 0.2741.

In [124]:
group4_multi_yr = pd.DataFrame(group4_multi_yr).reset_index()
improvement_arr = []
for i in range(len(group4_multi_yr['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in group4_multi_yr['Virus_Year_Measurements'][i]:
        for dataset in group4_multi_yr['Virus_Year_Measurements'][i][virus]:
            delta = group4_multi_yr['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)
group4_multi_yr['Improvement'] = improvement_arr
average_arr = []
for i in range(len(group4_multi_yr['Improvement'])):
   average_arr.append(sum(group4_multi_yr['Improvement'][i])/len(group4_multi_yr['Improvement'][i]))
group4_multi_yr['Average Improvement'] = average_arr
group4_multi_yr

Unnamed: 0,level_0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset
0,0,0,001,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [3, 3...","[0, 0, 0, 0, 4, 2, 5, 1, 0, 0, 4, 1, 1, 0, 1, ...",1.161290,"{32, 33, 29, 31}","{2016 UGA, 2019 UGA, 2021 UGA, 2020 UGA}"
1,3,6,005,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 0...","[-1, 0, 0, 0, 0, -1, 0, 2, 1, -1, 0, 0, 0, 0, ...",0.000000,"{25, 26, 27, 28, 29}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
2,6,10,007,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [4, 4...","[-4, 1, 1, 0, 0, 0, 0, 0, 4, -1, 0, 0, 0, 0, 0...",0.170732,"{22, 23}","{2017 UGA, 2016 UGA, 2018 UGA}"
3,11,16,010,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 2...","[-1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0.156250,"{25, 26}","{2017 UGA, 2016 UGA}"
4,12,18,011,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 2...","[-1, 0, 0, 0, 0, -1, 2, 1, 0, -1, 0, 0, 0, -1,...",0.125000,"{25, 26, 27, 28, 29, 30}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
...,...,...,...,...,...,...,...,...
146,476,719,524,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [5, 4,...","[1, 1, 1, 0, 1, 0, 1, 0, 1, 2, 0, 1, 1, 3, 0]",0.866667,"{24, 25, 23}","{2019 UGA, 2020 UGA, 2021 UGA}"
147,478,721,526,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [5, 4,...","[1, 1, 0, 0, 0, -2, 1, 0, 0, 0, 0, 0, 0, -1, 0]",0.000000,"{27, 28}","{2019 UGA, 2020 UGA, 2021 UGA}"
148,479,726,531,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [2, 2,...","[2, 0, 3, 1, 3, 0, 3, 0, 2, 2, 1, 1, 3, 0, 3]",1.600000,"{19, 20, 21}","{2019 UGA, 2020 UGA, 2021 UGA}"
149,481,728,539,"{'H1N1 A/Brisbane/2/2018': {'2020 UGA': [0, 5,...","[5, 0, 1, 0, 4, 0, 2, 1, 0]",1.444444,{18},"{2020 UGA, 2021 UGA}"


In [126]:
avg_improvement = sum(group4_multi_yr['Average Improvement'])/len(group4_multi_yr)
avg_improvement

0.4064528312752101

In [128]:
improvement_ct = 0
for i in range(len(group4_multi_yr)):
   if group4_multi_yr['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(group4_multi_yr)

0.7682119205298014

The average improvement in adults 18-44 who were vaccinated over multuple years is 0.40645.

In [131]:
group5_multi_yr = pd.DataFrame(group5_multi_yr).reset_index()
improvement_arr = []
for i in range(len(group5_multi_yr['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in group5_multi_yr['Virus_Year_Measurements'][i]:
        for dataset in group5_multi_yr['Virus_Year_Measurements'][i][virus]:
            delta = group5_multi_yr['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)
group5_multi_yr['Improvement'] = improvement_arr
average_arr = []
for i in range(len(group5_multi_yr['Improvement'])):
   average_arr.append(sum(group5_multi_yr['Improvement'][i])/len(group5_multi_yr['Improvement'][i]))
group5_multi_yr['Average Improvement'] = average_arr
group5_multi_yr

Unnamed: 0,level_0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset
0,8,12,008,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [2, 2...","[0, -1, 0, 0, 0, -1, 1, 1, -1, -1, 0, 3, 1, -1...",-0.035714,"{46, 47, 48, 49, 50, 51}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
1,21,30,017,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [3, 2...","[-2, 1, 0, 0, 1, 0, 0, 1, -1, 0, -1, 1, 0, -1,...",-0.125000,"{51, 52, 53, 54, 55, 56}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
2,49,67,037,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [2, 2...","[-1, 0, 0, 6, 0, 1, 0, -1, 0, 0, 4, 0, -2, 2, ...",0.446809,"{57, 58, 59, 60}","{2017 UGA, 2016 UGA, 2018 UGA, 2019 UGA}"
3,51,69,038,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [2, 3...","[1, -1, 0, -2, 0, -1, -1, 0, 1, 0, -2, 0, 4, 0...",0.392857,"{43, 44, 46, 47}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
4,70,93,051,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 1...","[0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 0, 6, -1, -5...",0.357143,"{45, 46, 47, 48, 49, 50}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
...,...,...,...,...,...,...,...,...
105,456,677,480,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [0, 3,...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, -2, 2]",0.133333,"{51, 52, 53}","{2019 UGA, 2020 UGA, 2021 UGA}"
106,459,682,485,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [3, 3,...","[-1, 1, 0, 3, 1, 0, -1, 1, -1, 0, -1]",0.181818,"{58, 59}","{2019 UGA, 2020 UGA}"
107,469,699,502,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [0, 4,...","[3, 0, -3, 0, 3, -1, 2, 0, 2, -1, 1, 3, 0, -7, 3]",0.333333,"{53, 54}","{2019 UGA, 2020 UGA, 2021 UGA}"
108,473,714,518,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [1, 2,...","[4, 1, 1, 5, 0, 0, 3, 0, 3, 0, 1]",1.636364,"{49, 50}","{2019 UGA, 2020 UGA}"


In [133]:
avg_improvement = sum(group5_multi_yr['Average Improvement'])/len(group5_multi_yr)
avg_improvement

0.569363267456103

In [135]:
improvement_ct = 0
for i in range(len(group5_multi_yr)):
   if group5_multi_yr['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(group5_multi_yr)

0.8454545454545455

The average improvement in adults aged 45-62 who were vaccinated over multiple years is 0.569.

In [138]:
group6_multi_yr = pd.DataFrame(group6_multi_yr).reset_index()
improvement_arr = []
for i in range(len(group6_multi_yr['Virus_Year_Measurements'])):
    temp_improvement_arr = []
    for virus in group6_multi_yr['Virus_Year_Measurements'][i]:
        for dataset in group6_multi_yr['Virus_Year_Measurements'][i][virus]:
            delta = group6_multi_yr['Virus_Year_Measurements'][i][virus][dataset]
            temp_improvement_arr.append(delta[len(delta)-1] - delta[0])
    improvement_arr.append(temp_improvement_arr)
group6_multi_yr['Improvement'] = improvement_arr
average_arr = []
for i in range(len(group6_multi_yr['Improvement'])):
   average_arr.append(sum(group6_multi_yr['Improvement'][i])/len(group6_multi_yr['Improvement'][i]))
group6_multi_yr['Average Improvement'] = average_arr
group6_multi_yr

Unnamed: 0,level_0,index,base_id,Virus_Year_Measurements,Improvement,Average Improvement,Age,Dataset
0,15,24,014,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 1...","[-1, 0, 0, 0, 0, 0, 1, -1, 0, 0, 1, 0, 1, 0, 0...",0.000000,"{64, 65, 66, 62}","{2021 UGA, 2016 UGA, 2020 UGA, 2017 UGA, 2019 ..."
1,63,86,047,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [0, 0...","[0, 0, 0, 0, 0, 1, 0, -1, 0, 0, 0, 0, 0, 1, -2...",0.017857,"{64, 65, 66, 67, 62, 63}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
2,106,143,077,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [1, 0...","[-1, 0, 0, 0, 0, 0, 0, 1, 1, -1, 0, 0, 0, -1, ...",-0.071429,"{64, 65, 66, 67, 68, 69}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
3,110,154,083,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [0, 0...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0,...",-0.017857,"{66, 67, 68, 69, 70}","{2021 UGA, 2016 UGA, 2020 UGA, 2018 UGA, 2017 ..."
4,121,170,091,"{'H1N1 A/Beijing/262/1995': {'2016 UGA': [0, 0...","[0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, -1, -1, 0, ...",-0.048780,"{72, 73, 74}","{2017 UGA, 2016 UGA, 2018 UGA}"
...,...,...,...,...,...,...,...,...
93,453,673,476,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [0, 0,...","[2, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 1, -2, 0]",0.066667,"{67, 68, 69}","{2019 UGA, 2020 UGA, 2021 UGA}"
94,457,678,481,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [0, 5]...","[5, 4, 1, 5, 4, 2, 4, 3, 6, 3]",3.700000,"{67, 69}","{2019 UGA, 2021 UGA}"
95,458,681,484,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [2, 1,...","[-1, 1, 0, -1, 1, 0, -1, 1, -2, 1, 2]",0.090909,"{70, 71}","{2019 UGA, 2020 UGA}"
96,477,720,525,"{'H1N1 A/Brisbane/2/2018': {'2019 UGA': [0, 3,...","[4, 3, 0, 3, 4, 0, 1, 5, 5, 3, 2]",2.727273,"{64, 65}","{2019 UGA, 2020 UGA}"


In [140]:
avg_improvement = sum(group6_multi_yr['Average Improvement'])/len(group6_multi_yr)
avg_improvement

0.32926395845086126

In [142]:
improvement_ct = 0
for i in range(len(group6_multi_yr)):
   if group6_multi_yr['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(group6_multi_yr)

0.6938775510204082

In [144]:
multi_yr_group_avg = ['N/A', 'N/A', 0.2741436485969319, 0.4064528312752101, 0.569363267456103, 0.32926395845086126]
multi_yr_group_improvement = ['N/A', 'N/A', 0.7741935483870968*100, 0.7682119205298014*100, 0.8454545454545455*100, 0.6938775510204082*100]

In [146]:
groups = pd.DataFrame(one_yr_group_avg, columns = ['One-Year Group Average Improvement'])
groups['One-Year Group Improvement Percentage(%)'] = one_yr_group_improvement
groups['Multi-Year Group Average Improvement'] = multi_yr_group_avg
groups['Multi-Year Group Improvement Percentage(%)'] = multi_yr_group_improvement
groups.index = ['Group 1(Infants (<0))', 'Group 2(Children(1-11))', 'Group 3(Adolescents(12-17))', 'Group 4(Adults(18-44))', 'Group 5(Middle-Aged Adults(45-62))', 'Group 6(Seniors(>63))']
groups

Unnamed: 0,One-Year Group Average Improvement,One-Year Group Improvement Percentage(%),Multi-Year Group Average Improvement,Multi-Year Group Improvement Percentage(%)
Group 1(Infants (<0)),,,,
Group 2(Children(1-11)),1.375,87.5,,
Group 3(Adolescents(12-17)),1.033832,87.179487,0.274144,77.419355
Group 4(Adults(18-44)),0.963705,85.840708,0.406453,76.821192
Group 5(Middle-Aged Adults(45-62)),0.86152,76.470588,0.569363,84.545455
Group 6(Seniors(>63)),0.557292,80.0,0.329264,69.387755


The average improvement in seniors is 0.326. The adolescents have the lowest improvement and seniors have second lowest. The older adults ahve higher improvement than the younger ones. Now the cross reactivity of each vaccine must be studied.

In [171]:
def create_virus_dict_cross(group):
    virus_dict = {}
    for virus, virus_group in group.groupby('Virus'):
        virus_dict[virus] = virus_group.sort_values('Time')['Measurement'].tolist()
    return virus_dict

In [173]:
study_2016 = []
study_2017 = []
study_2018 = []
study_2019 = []
study_2020 = []
study_2021 = []

for i in range(len(df)):
    if df['Dataset'][i] == '2016 UGA':
        study_2016.append(df.loc[i])
    elif df['Dataset'][i] == '2017 UGA':
        study_2017.append(df.loc[i])
    elif df['Dataset'][i] == '2018 UGA':
        study_2018.append(df.loc[i])
    elif df['Dataset'][i] == '2019 UGA':
        study_2019.append(df.loc[i])
    elif df['Dataset'][i] == '2020 UGA':
        study_2020.append(df.loc[i])
    else:
        study_2021.append(df.loc[i])

The 2016 study will be considered first.

In [175]:
study_2016 = pd.DataFrame(study_2016)
study_2016

Unnamed: 0,Dataset,Virus,Subject,Age,Time,Measurement,base_id
0,2016 UGA,H1N1 A/South Carolina/1/1918,ID_001_UGA2016,29,Day0,2,001
1,2016 UGA,H1N1 A/Weiss/JY2/1943,ID_001_UGA2016,29,Day0,4,001
2,2016 UGA,H1N1 A/Fort Monmouth/1/1947,ID_001_UGA2016,29,Day0,1,001
3,2016 UGA,H1N1 A/Denver/1/1957,ID_001_UGA2016,29,Day0,0,001
4,2016 UGA,H1N1 A/New Jersey/8/1976,ID_001_UGA2016,29,Day0,0,001
...,...,...,...,...,...,...,...
6363,2016 UGA,H1N1 A/New Caledonia/20/1999,ID_151_UGA2016,66,Day250,1,151
6364,2016 UGA,H1N1 A/Solomon Islands/3/2006,ID_151_UGA2016,66,Day250,1,151
6365,2016 UGA,H1N1 A/Brisbane/59/2007,ID_151_UGA2016,66,Day250,1,151
6366,2016 UGA,H1N1 A/California/7/2009,ID_151_UGA2016,66,Day250,3,151


In [176]:
result_2016 = study_2016.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')
result_2016

  result_2016 = study_2016.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')


Unnamed: 0,base_id,Virus Measurements
0,001,"{'H1N1 A/Beijing/262/1995': [3, 3], 'H1N1 A/Br..."
1,002,"{'H1N1 A/Beijing/262/1995': [4, 3], 'H1N1 A/Br..."
2,003,"{'H1N1 A/Beijing/262/1995': [4, 3], 'H1N1 A/Br..."
3,004,"{'H1N1 A/Beijing/262/1995': [1, 3], 'H1N1 A/Br..."
4,005,"{'H1N1 A/Beijing/262/1995': [1, 0, 0], 'H1N1 A..."
...,...,...
143,147,"{'H1N1 A/Beijing/262/1995': [3, 3, 1], 'H1N1 A..."
144,148,"{'H1N1 A/Beijing/262/1995': [1, 0, 0], 'H1N1 A..."
145,149,"{'H1N1 A/Beijing/262/1995': [1, 0, 0], 'H1N1 A..."
146,150,"{'H1N1 A/Beijing/262/1995': [0, 0, 0], 'H1N1 A..."


In [177]:
improvement_arr = []
for i in range(len(result_2016['Virus Measurements'])):
    temp_improvement_arr = []
    for virus in result_2016['Virus Measurements'][i]:
        delta = result_2016['Virus Measurements'][i][virus]
        temp_improvement_arr.append(delta[len(delta)-1]-delta[0])
    improvement_arr.append(temp_improvement_arr)

In [178]:
result_2016['Improvements'] = improvement_arr
result_2016

Unnamed: 0,base_id,Virus Measurements,Improvements
0,001,"{'H1N1 A/Beijing/262/1995': [3, 3], 'H1N1 A/Br...","[0, 0, 2, 1, 1, 0, 1, 0, 1, 3, 0, 1, 1, 0, 2, 0]"
1,002,"{'H1N1 A/Beijing/262/1995': [4, 3], 'H1N1 A/Br...","[-1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0..."
2,003,"{'H1N1 A/Beijing/262/1995': [4, 3], 'H1N1 A/Br...","[-1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 1]"
3,004,"{'H1N1 A/Beijing/262/1995': [1, 3], 'H1N1 A/Br...","[2, 0, 2, 4, 0, 1, 0, 4, 2, 2, 2, 2, 3, 0, 0, 4]"
4,005,"{'H1N1 A/Beijing/262/1995': [1, 0, 0], 'H1N1 A...","[-1, 0, 1, 0, 0, 0, 0, 1, 1, 0, -1, 1, 0, 2, 0..."
...,...,...,...
143,147,"{'H1N1 A/Beijing/262/1995': [3, 3, 1], 'H1N1 A...","[-2, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, -1, 1, 3, 1..."
144,148,"{'H1N1 A/Beijing/262/1995': [1, 0, 0], 'H1N1 A...","[-1, 1, 0, 0, 0, 1, 0, 2, -2, 0, -5, -4, 0, 0,..."
145,149,"{'H1N1 A/Beijing/262/1995': [1, 0, 0], 'H1N1 A...","[-1, 0, 0, -1, 0, 0, 0, 0, -1, 0, -4, -2, 0, -..."
146,150,"{'H1N1 A/Beijing/262/1995': [0, 0, 0], 'H1N1 A...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 4, -2, 0, 0, 0, 0, 0]"


In [179]:
avg_improvement = []
for i in range(len(result_2016['Improvements'])):
    avg_improvement.append(sum(result_2016['Improvements'][i]) / len(result_2016['Improvements'][i]))
result_2016['Average Improvement'] = avg_improvement

In [180]:
result_2016

Unnamed: 0,base_id,Virus Measurements,Improvements,Average Improvement
0,001,"{'H1N1 A/Beijing/262/1995': [3, 3], 'H1N1 A/Br...","[0, 0, 2, 1, 1, 0, 1, 0, 1, 3, 0, 1, 1, 0, 2, 0]",0.8125
1,002,"{'H1N1 A/Beijing/262/1995': [4, 3], 'H1N1 A/Br...","[-1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0...",-0.0625
2,003,"{'H1N1 A/Beijing/262/1995': [4, 3], 'H1N1 A/Br...","[-1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 1]",0.3750
3,004,"{'H1N1 A/Beijing/262/1995': [1, 3], 'H1N1 A/Br...","[2, 0, 2, 4, 0, 1, 0, 4, 2, 2, 2, 2, 3, 0, 0, 4]",1.7500
4,005,"{'H1N1 A/Beijing/262/1995': [1, 0, 0], 'H1N1 A...","[-1, 0, 1, 0, 0, 0, 0, 1, 1, 0, -1, 1, 0, 2, 0...",0.3125
...,...,...,...,...
143,147,"{'H1N1 A/Beijing/262/1995': [3, 3, 1], 'H1N1 A...","[-2, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0, -1, 1, 3, 1...",0.5000
144,148,"{'H1N1 A/Beijing/262/1995': [1, 0, 0], 'H1N1 A...","[-1, 1, 0, 0, 0, 1, 0, 2, -2, 0, -5, -4, 0, 0,...",-0.4375
145,149,"{'H1N1 A/Beijing/262/1995': [1, 0, 0], 'H1N1 A...","[-1, 0, 0, -1, 0, 0, 0, 0, -1, 0, -4, -2, 0, -...",-0.8750
146,150,"{'H1N1 A/Beijing/262/1995': [0, 0, 0], 'H1N1 A...","[0, 0, 0, 1, 0, 0, 0, 0, 0, 4, -2, 0, 0, 0, 0, 0]",0.1875


In [181]:
sum(avg_improvement) / len(avg_improvement)

0.2951858108108108

In [229]:
improvement_ct = 0
for i in range(len(result_2016)):
   if result_2016['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(result_2016)

0.6351351351351351

The average improvement of patients in 2016 was 0.29518

In [183]:
study_2017 = pd.DataFrame(study_2017).reset_index()
result_2017 = study_2017.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')

  result_2017 = study_2017.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')


In [185]:
improvement_arr = []
for i in range(len(result_2017['Virus Measurements'])):
    temp_improvement_arr = []
    for virus in result_2017['Virus Measurements'][i]:
        delta = result_2017['Virus Measurements'][i][virus]
        temp_improvement_arr.append(delta[len(delta)-1]-delta[0])
    improvement_arr.append(temp_improvement_arr)

In [187]:
result_2017['Improvement'] = improvement_arr
result_2017

Unnamed: 0,base_id,Virus Measurements,Improvement
0,001C,"{'H1N1 A/Beijing/262/1995': [0, 2, 0], 'H1N1 A...","[0, 0, 0, -1, -1, 0, 1, 1, -1, 3, 1, -1, 2, 0,..."
1,004C,"{'H1N1 A/Beijing/262/1995': [3, 3, 3], 'H1N1 A...","[0, 0, 0, -1, -4, 0, 1, 1, 0, 1, -1, 0, 1, -1,..."
2,005,"{'H1N1 A/Beijing/262/1995': [0, 0, 0], 'H1N1 A...","[0, 0, -1, 0, 0, 0, 0, -1, -1, 0, 0, -1, 0, 0,..."
3,005C,"{'H1N1 A/Beijing/262/1995': [2, 2, 2], 'H1N1 A...","[0, 0, 1, 2, -3, 0, 0, 4, 1, 3, 0, 1, 3, 0, 0, 0]"
4,006C,"{'H1N1 A/Beijing/262/1995': [3, 4, 3], 'H1N1 A...","[0, 0, 1, 2, -4, 1, 0, 3, 0, 6, -1, 0, 5, 0, 1..."
...,...,...,...
266,256,"{'H1N1 A/Beijing/262/1995': [0, 0], 'H1N1 A/Br...","[0, 1, 0, 3, 0, 0, 1, 4, 0, 3, 1, -1, 2, 1, 0, 1]"
267,257,"{'H1N1 A/Beijing/262/1995': [1, 0], 'H1N1 A/Br...","[-1, 0, 0, 3, 1, 1, 1, 5, 1, 4, 2, 0, 3, 1, 0, 0]"
268,260,"{'H1N1 A/Beijing/262/1995': [1, 1], 'H1N1 A/Br...","[0, 1, 3, 3, 0, 3, 0, 4, 3, 2, 4, 3, 1, 4, 1, 0]"
269,261,"{'H1N1 A/Beijing/262/1995': [1, 0], 'H1N1 A/Br...","[-1, 2, 0, 2, 1, 3, 2, 4, 0, 2, 3, -2, 2, 2, 1..."


In [190]:
avg_improvement = []
for i in range(len(result_2017['Improvement'])):
    avg_improvement.append(sum(result_2017['Improvement'][i]) / len(result_2017['Improvement'][i]))
result_2017['Average Improvement'] = avg_improvement
result_2017

Unnamed: 0,base_id,Virus Measurements,Improvement,Average Improvement
0,001C,"{'H1N1 A/Beijing/262/1995': [0, 2, 0], 'H1N1 A...","[0, 0, 0, -1, -1, 0, 1, 1, -1, 3, 1, -1, 2, 0,...",0.3125
1,004C,"{'H1N1 A/Beijing/262/1995': [3, 3, 3], 'H1N1 A...","[0, 0, 0, -1, -4, 0, 1, 1, 0, 1, -1, 0, 1, -1,...",-0.0625
2,005,"{'H1N1 A/Beijing/262/1995': [0, 0, 0], 'H1N1 A...","[0, 0, -1, 0, 0, 0, 0, -1, -1, 0, 0, -1, 0, 0,...",-0.3125
3,005C,"{'H1N1 A/Beijing/262/1995': [2, 2, 2], 'H1N1 A...","[0, 0, 1, 2, -3, 0, 0, 4, 1, 3, 0, 1, 3, 0, 0, 0]",0.7500
4,006C,"{'H1N1 A/Beijing/262/1995': [3, 4, 3], 'H1N1 A...","[0, 0, 1, 2, -4, 1, 0, 3, 0, 6, -1, 0, 5, 0, 1...",0.9375
...,...,...,...,...
266,256,"{'H1N1 A/Beijing/262/1995': [0, 0], 'H1N1 A/Br...","[0, 1, 0, 3, 0, 0, 1, 4, 0, 3, 1, -1, 2, 1, 0, 1]",1.0000
267,257,"{'H1N1 A/Beijing/262/1995': [1, 0], 'H1N1 A/Br...","[-1, 0, 0, 3, 1, 1, 1, 5, 1, 4, 2, 0, 3, 1, 0, 0]",1.3125
268,260,"{'H1N1 A/Beijing/262/1995': [1, 1], 'H1N1 A/Br...","[0, 1, 3, 3, 0, 3, 0, 4, 3, 2, 4, 3, 1, 4, 1, 0]",2.0000
269,261,"{'H1N1 A/Beijing/262/1995': [1, 0], 'H1N1 A/Br...","[-1, 2, 0, 2, 1, 3, 2, 4, 0, 2, 3, -2, 2, 2, 1...",1.3750


In [195]:
sum(avg_improvement)/len(avg_improvement)

0.21309963099630996

In [231]:
improvement_ct = 0
for i in range(len(result_2017)):
   if result_2017['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(result_2017)

0.5018450184501845

The average improvement of subjects in 2017 was 0.213.

In [197]:
study_2018 = pd.DataFrame(study_2018).reset_index()
result_2018 = study_2018.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')
improvement_arr = []
for i in range(len(result_2018['Virus Measurements'])):
    temp_improvement_arr = []
    for virus in result_2018['Virus Measurements'][i]:
        delta = result_2018['Virus Measurements'][i][virus]
        temp_improvement_arr.append(delta[len(delta)-1]-delta[0])
    improvement_arr.append(temp_improvement_arr)

result_2018['Improvement'] = improvement_arr
avg_improvement = []
for i in range(len(result_2018['Improvement'])):
    avg_improvement.append(sum(result_2018['Improvement'][i]) / len(result_2018['Improvement'][i]))

result_2018['Average Improvement'] = avg_improvement

  result_2018 = study_2018.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')


In [198]:
result_2018

Unnamed: 0,base_id,Virus Measurements,Improvement,Average Improvement
0,001C,"{'H1N1 A/Beijing/262/1995': [0, 1], 'H1N1 A/Br...","[1, 0, 2, 0, 2, 0, -1, 0, 0]",0.444444
1,004C,"{'H1N1 A/Beijing/262/1995': [3, 3], 'H1N1 A/Br...","[0, -1, -1, 0, 0, 0, -1, 0, 0]",-0.333333
2,005,"{'H1N1 A/Beijing/262/1995': [0, 0], 'H1N1 A/Br...","[0, 0, -1, 0, -1, 0, 0, 0, -1]",-0.333333
3,005C,"{'H1N1 A/Beijing/262/1995': [2, 2], 'H1N1 A/Br...","[0, 0, 0, 0, -1, 0, 0, 0, 0]",-0.111111
4,006C,"{'H1N1 A/Beijing/262/1995': [3, 3], 'H1N1 A/Br...","[0, -1, -1, 0, 0, 0, 0, 0, 0]",-0.222222
...,...,...,...,...
245,252,"{'H1N1 A/Beijing/262/1995': [2, 2], 'H1N1 A/Br...","[0, -1, -1, 0, -1, 0, 0, 0, 0]",-0.333333
246,263,"{'H1N1 A/Beijing/262/1995': [1, 1], 'H1N1 A/Br...","[0, 0, -3, 0, -1, 0, 0, 0, 0]",-0.444444
247,264,"{'H1N1 A/Beijing/262/1995': [2, 3], 'H1N1 A/Br...","[1, -1, -1, 0, -1, 0, 0, 0, 0]",-0.222222
248,265,"{'H1N1 A/Beijing/262/1995': [3, 3], 'H1N1 A/Br...","[0, 0, 1, 0, 2, 0, 1, 0, 0]",0.444444


In [202]:
sum(avg_improvement)/len(avg_improvement)

0.09422222222222223

In [233]:
improvement_ct = 0
for i in range(len(result_2018)):
   if result_2018['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(result_2018)

0.428

The average improvement in subjects for 2018 was 0.0942.

In [206]:
study_2019 = pd.DataFrame(study_2019).reset_index()
result_2019 = study_2019.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')
improvement_arr = []
for i in range(len(result_2019['Virus Measurements'])):
    temp_improvement_arr = []
    for virus in result_2019['Virus Measurements'][i]:
        delta = result_2019['Virus Measurements'][i][virus]
        temp_improvement_arr.append(delta[len(delta)-1]-delta[0])
    improvement_arr.append(temp_improvement_arr)

result_2019['Improvement'] = improvement_arr
avg_improvement = []
for i in range(len(result_2019['Improvement'])):
    avg_improvement.append(sum(result_2019['Improvement'][i]) / len(result_2019['Improvement'][i]))

result_2019['Average Improvement'] = avg_improvement

  result_2019 = study_2019.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')


In [207]:
result_2019

Unnamed: 0,base_id,Virus Measurements,Improvement,Average Improvement
0,001,"{'H1N1 A/Brisbane/2/2018': [3, 4, 3], 'H1N1 A/...","[0, 5, 0, 1, 1, 0]",1.166667
1,004C,"{'H1N1 A/Brisbane/2/2018': [5, 6, 6], 'H1N1 A/...","[1, 0, 1, 0, 0, -1]",0.166667
2,005,"{'H1N1 A/Brisbane/2/2018': [1, 1, 0], 'H1N1 A/...","[-1, 0, 0, 0, 0, -1]",-0.333333
3,006C,"{'H1N1 A/Brisbane/2/2018': [4, 5], 'H1N1 A/Bri...","[1, 2, 1, 0, 1, 0]",0.833333
4,007C,"{'H1N1 A/Brisbane/2/2018': [2, 2], 'H1N1 A/Bri...","[0, 0, 0, 0, 0, 0]",0.000000
...,...,...,...,...
456,528,"{'H1N1 A/Brisbane/2/2018': [4, 5], 'H1N1 A/Bri...","[1, 0, 1, 0, 0, 0]",0.333333
457,529,"{'H1N1 A/Brisbane/2/2018': [3, 4], 'H1N1 A/Bri...","[1, -1, 1, 0, 0, 2]",0.500000
458,530,"{'H1N1 A/Brisbane/2/2018': [0, 5], 'H1N1 A/Bri...","[5, 1, 6, 0, 4, 7]",3.833333
459,531,"{'H1N1 A/Brisbane/2/2018': [2, 2, 4], 'H1N1 A/...","[2, 1, 3, 0, 2, 1]",1.500000


In [211]:
sum(avg_improvement)/len(avg_improvement)

0.5336225596529284

In [235]:
improvement_ct = 0
for i in range(len(result_2019)):
   if result_2019['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(result_2019)

0.6203904555314533

The average improvement in subjects in 2019 was 0.5336. 

In [213]:
study_2020 = pd.DataFrame(study_2020).reset_index()
result_2020 = study_2020.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')
improvement_arr = []
for i in range(len(result_2020['Virus Measurements'])):
    temp_improvement_arr = []
    for virus in result_2020['Virus Measurements'][i]:
        delta = result_2020['Virus Measurements'][i][virus]
        temp_improvement_arr.append(delta[len(delta)-1]-delta[0])
    improvement_arr.append(temp_improvement_arr)

result_2020['Improvement'] = improvement_arr
avg_improvement = []
for i in range(len(result_2020['Improvement'])):
    avg_improvement.append(sum(result_2020['Improvement'][i]) / len(result_2020['Improvement'][i]))

result_2020['Average Improvement'] = avg_improvement

  result_2020 = study_2020.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')


In [218]:
result_2020

Unnamed: 0,base_id,Virus Measurements,Improvement,Average Improvement
0,001,"{'H1N1 A/Brisbane/2/2018': [3, 3, 3], 'H1N1 A/...","[0, 0, -1, 0, 0]",-0.2
1,004C,"{'H1N1 A/Brisbane/2/2018': [6, 7], 'H1N1 A/Cal...","[1, 0, 1, 1, 2]",1.0
2,005,"{'H1N1 A/Brisbane/2/2018': [0, 1, 0], 'H1N1 A/...","[0, 0, 2, 1, -2]",0.2
3,008,"{'H1N1 A/Brisbane/2/2018': [2, 4, 3], 'H1N1 A/...","[1, 0, 0, 1, 1]",0.6
4,009C,"{'H1N1 A/Brisbane/2/2018': [1, 5, 5], 'H1N1 A/...","[4, 2, 4, 3, 2]",3.0
...,...,...,...,...
334,539,"{'H1N1 A/Brisbane/2/2018': [0, 5, 5], 'H1N1 A/...","[5, 1, 4, 2, 1]",2.6
335,540,"{'H1N1 A/Brisbane/2/2018': [3, 5, 4], 'H1N1 A/...","[1, 1, 2, 3, 0]",1.4
336,542,"{'H1N1 A/Brisbane/2/2018': [7, 7], 'H1N1 A/Cal...","[0, 0, 0, 1, 1]",0.4
337,549,"{'H1N1 A/Brisbane/2/2018': [5, 6], 'H1N1 A/Cal...","[1, 0, 0, 1, 1]",0.6


In [220]:
sum(avg_improvement)/len(avg_improvement)

0.3952802359882006

In [237]:
improvement_ct = 0
for i in range(len(result_2020)):
   if result_2020['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(result_2020)

0.6253687315634219

The average improvement in subjects in 2020 was 0.3952.

In [223]:
study_2021 = pd.DataFrame(study_2021).reset_index()
result_2021 = study_2021.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')
improvement_arr = []
for i in range(len(result_2021['Virus Measurements'])):
    temp_improvement_arr = []
    for virus in result_2021['Virus Measurements'][i]:
        delta = result_2021['Virus Measurements'][i][virus]
        temp_improvement_arr.append(delta[len(delta)-1]-delta[0])
    improvement_arr.append(temp_improvement_arr)

result_2021['Improvement'] = improvement_arr
avg_improvement = []
for i in range(len(result_2021['Improvement'])):
    avg_improvement.append(sum(result_2021['Improvement'][i]) / len(result_2021['Improvement'][i]))

result_2021['Average Improvement'] = avg_improvement

  result_2021 = study_2021.groupby('base_id').apply(create_virus_dict_cross).reset_index(name='Virus Measurements')


In [224]:
result_2021

Unnamed: 0,base_id,Virus Measurements,Improvement,Average Improvement
0,001,"{'H1N1 A/Brisbane/2/2018': [3, 7], 'H1N1 A/Cal...","[4, 4, 4, 5]",4.25
1,005,"{'H1N1 A/Brisbane/2/2018': [0, 2], 'H1N1 A/Cal...","[2, 1, -1, 2]",1.00
2,008,"{'H1N1 A/Brisbane/2/2018': [3, 5, 4], 'H1N1 A/...","[1, 2, 0, 1]",1.00
3,009C,"{'H1N1 A/Brisbane/2/2018': [5, 5], 'H1N1 A/Cal...","[0, -1, -1, 0]",-0.50
4,011,"{'H1N1 A/Brisbane/2/2018': [6, 7], 'H1N1 A/Cal...","[1, 3, 2, 3]",2.25
...,...,...,...,...
332,560,"{'H1N1 A/Brisbane/2/2018': [4, 5], 'H1N1 A/Cal...","[1, 1, 1, 3]",1.50
333,561,"{'H1N1 A/Brisbane/2/2018': [6, 6, 6], 'H1N1 A/...","[0, 1, 0, 1]",0.50
334,562,"{'H1N1 A/Brisbane/2/2018': [4, 5], 'H1N1 A/Cal...","[1, 2, 1, 2]",1.50
335,563,"{'H1N1 A/Brisbane/2/2018': [3, 4], 'H1N1 A/Cal...","[1, 1, 1, 1]",1.00


In [227]:
sum(avg_improvement)/len(avg_improvement)

1.072700296735905

In [239]:
improvement_ct = 0
for i in range(len(result_2021)):
   if result_2021['Average Improvement'][i] > 0:
       improvement_ct += 1
improvement_ct/len(result_2021)

0.8100890207715133

In [241]:
improvement_arr = [0.2951858108108108, 0.21309963099630996, 0.09422222222222223, 0.5336225596529284, 0.3952802359882006, 1.072700296735905]
percentage_arr = [0.6351351351351351*100, 0.5018450184501845*100, 0.428*100, 0.6203904555314533*100, 0.6253687315634219*100, 0.8100890207715133*100]

In [243]:
study_table = pd.DataFrame(improvement_arr, columns=['Average Improvement'])
study_table['Percentage of Improvement(%)'] = percentage_arr
study_table.index = ['2016 UGA', '2017 UGA', '2018 UGA', '2019 UGA', '2020 UGA', '2021 UGA']
study_table

Unnamed: 0,Average Improvement,Percentage of Improvement(%)
2016 UGA,0.295186,63.513514
2017 UGA,0.2131,50.184502
2018 UGA,0.094222,42.8
2019 UGA,0.533623,62.039046
2020 UGA,0.39528,62.536873
2021 UGA,1.0727,81.008902


2021 vaccine has the best cross reactivity and 2018 has the worst.