In [1]:
import pandas as pd
import glob
import matplotlib.pyplot as plt 
import seaborn as sns
import re

In [2]:
data_EU_moderna = pd.DataFrame()
for file_name in glob.glob('EU Files/EU Moderna' + '*.csv'):
    x = pd.read_csv(file_name, low_memory = False)
    data_EU_moderna = pd.concat([data_EU_moderna, x], axis = 0, ignore_index = True)
    print(file_name)
data_EU_moderna.head(2)

EU Files\EU Moderna 1 2021.csv
EU Files\EU Moderna 1 2022.csv
EU Files\EU Moderna 1 2023.csv
EU Files\EU Moderna 2 2023.csv
EU Files\EU Moderna 3 2022.csv
EU Files\EU Moderna 3 2023.csv
EU Files\EU Moderna 4 2022.csv
EU Files\EU Moderna 4 2023.csv


Unnamed: 0,EU Local Number,Report Type,EV Gateway Receipt Date,Primary Source Qualification,Primary Source Country for Regulatory Purposes,Literature Reference,Patient Age Group,Patient Age Group (as per reporter),Parent Child Report,Patient Sex,Reaction List PT (Duration – Outcome - Seriousness Criteria),Suspect/interacting Drug List (Drug Char - Indication PT - Action taken - [Duration - Dose - Route]),Concomitant/Not Administered Drug List (Drug Char - Indication PT - Action taken - [Duration - Dose - Route]),ICSR Form
0,EU-EC-10011128459,Spontaneous,2021-12-31 00:00:00,Non Healthcare Professional,Non European Economic Area,Not available,18-64 Years,Not Specified,No,Female,Eye pain (n/a - Not Recovered/Not Resolved - O...,SPIKEVAX [COVID-19 MRNA VACCINE MODERNA (CX-02...,COVID-19 VACCINE ASTRAZENECA (CHADOX1 NCOV-19)...,"<a target=""_blank"" href=""https://dap.ema.europ..."
1,EU-EC-10011128525,Spontaneous,2021-12-31 00:00:00,Non Healthcare Professional,European Economic Area,Not available,18-64 Years,Adult,No,Female,"Chest pain (n/a - Unknown - ),<BR><BR>Dizzines...",SPIKEVAX [COVID-19 MRNA VACCINE MODERNA (CX-02...,Not reported,"<a target=""_blank"" href=""https://dap.ema.europ..."


In [3]:
data_EU_moderna.columns = ["EU_ID", "1", "2", "3", "4", "5", "AGE_GROUP", "6", "7", "SEX", "SYMPTOMS_LIST", "8", "9", "10"]
data_EU_moderna = data_EU_moderna[["EU_ID", "AGE_GROUP", "SEX", "SYMPTOMS_LIST"]]
data_EU_moderna.head(2)

Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST
0,EU-EC-10011128459,18-64 Years,Female,Eye pain (n/a - Not Recovered/Not Resolved - O...
1,EU-EC-10011128525,18-64 Years,Female,"Chest pain (n/a - Unknown - ),<BR><BR>Dizzines..."


In [4]:
def extract_symptoms(symptom_list):
    if isinstance(symptom_list, str):
        # Split the reactions using the HTML break tag as the delimiter
        symptoms = re.split(r'<BR><BR>', symptom_list)
        # Further clean and extract just the reaction name before the first parenthesis
        symptoms = [re.split(r'\(', symptom.strip())[0].strip() for symptom in symptoms]
        return symptoms
    else:
        return []

In [5]:
data_EU_moderna["SYMPTOMS_LIST"] = data_EU_moderna["SYMPTOMS_LIST"].apply(extract_symptoms)
data_EU_moderna

Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST
0,EU-EC-10011128459,18-64 Years,Female,"[Eye pain, Migraine, Vomiting]"
1,EU-EC-10011128525,18-64 Years,Female,"[Chest pain, Dizziness, Fatigue, Headache, Mya..."
2,EU-EC-10011128535,18-64 Years,Male,"[Bundle branch block right, C-reactive protein..."
3,EU-EC-10011128589,65-85 Years,Female,"[COVID-19, Flank pain, SARS-CoV-2 test positive]"
4,EU-EC-10011128599,Not Specified,Female,"[Ear pain, Nausea, Pain, Pyrexia, Tenderness]"
...,...,...,...,...
393819,EU-EC-10014357459,65-85 Years,Male,"[Atrial fibrillation, Cardiac failure congesti..."
393820,EU-EC-10014357555,Not Specified,Male,"[COVID-19, COVID-19 immunisation, Interchange ..."
393821,EU-EC-10014349445,Not Specified,Male,[Death]
393822,EU-EC-10014351587,Not Specified,Male,[Death]


In [6]:
data_EU_moderna["AGE_GROUP"] = data_EU_moderna["AGE_GROUP"].astype(str).str.replace("Years", "").str.strip()
data_EU_moderna

Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST
0,EU-EC-10011128459,18-64,Female,"[Eye pain, Migraine, Vomiting]"
1,EU-EC-10011128525,18-64,Female,"[Chest pain, Dizziness, Fatigue, Headache, Mya..."
2,EU-EC-10011128535,18-64,Male,"[Bundle branch block right, C-reactive protein..."
3,EU-EC-10011128589,65-85,Female,"[COVID-19, Flank pain, SARS-CoV-2 test positive]"
4,EU-EC-10011128599,Not Specified,Female,"[Ear pain, Nausea, Pain, Pyrexia, Tenderness]"
...,...,...,...,...
393819,EU-EC-10014357459,65-85,Male,"[Atrial fibrillation, Cardiac failure congesti..."
393820,EU-EC-10014357555,Not Specified,Male,"[COVID-19, COVID-19 immunisation, Interchange ..."
393821,EU-EC-10014349445,Not Specified,Male,[Death]
393822,EU-EC-10014351587,Not Specified,Male,[Death]


In [7]:
data_EU_moderna["SEX"] = data_EU_moderna["SEX"].astype(str).str.replace("emale", "").str.strip()
data_EU_moderna["SEX"] = data_EU_moderna["SEX"].astype(str).str.replace("ale", "").str.strip()
data_EU_moderna["SEX"] = data_EU_moderna["SEX"].astype(str).str.replace("Not Specified", "U").str.strip()
data_EU_moderna

Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST
0,EU-EC-10011128459,18-64,F,"[Eye pain, Migraine, Vomiting]"
1,EU-EC-10011128525,18-64,F,"[Chest pain, Dizziness, Fatigue, Headache, Mya..."
2,EU-EC-10011128535,18-64,M,"[Bundle branch block right, C-reactive protein..."
3,EU-EC-10011128589,65-85,F,"[COVID-19, Flank pain, SARS-CoV-2 test positive]"
4,EU-EC-10011128599,Not Specified,F,"[Ear pain, Nausea, Pain, Pyrexia, Tenderness]"
...,...,...,...,...
393819,EU-EC-10014357459,65-85,M,"[Atrial fibrillation, Cardiac failure congesti..."
393820,EU-EC-10014357555,Not Specified,M,"[COVID-19, COVID-19 immunisation, Interchange ..."
393821,EU-EC-10014349445,Not Specified,M,[Death]
393822,EU-EC-10014351587,Not Specified,M,[Death]


In [8]:
data_EU_moderna["SEX"].unique()

array(['F', 'M', 'U'], dtype=object)

In [9]:
# ",".join(data_EU_moderna["Symptoms List"][0]).split(",")
data_EU_moderna["SYMPTOMS_LIST"][0]

['Eye pain', 'Migraine', 'Vomiting']

In [10]:
symptom_columns = ["SYMPTOM1", "SYMPTOM2", "SYMPTOM3", "SYMPTOM4", "SYMPTOM5"]

In [11]:
# symptom_df = data_EU_moderna["Symptoms List"].str.split(",", expand=True)
# symptom_df.columns = symptom_df
# x = pd.concat([data_EU_moderna, symptom_df], axis=1)
# x

In [12]:
# pd.DataFrame(data_EU_moderna["Symptoms List"].to_list(), columns = ["SYMPTOM1", "SYMPTOM2", "SYMPTOM3", "SYMPTOM4", "SYMPTOM5"])

In [13]:
data_EU_moderna['VAX_MANU'] = "MODERNA"
data_EU_moderna

Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST,VAX_MANU
0,EU-EC-10011128459,18-64,F,"[Eye pain, Migraine, Vomiting]",MODERNA
1,EU-EC-10011128525,18-64,F,"[Chest pain, Dizziness, Fatigue, Headache, Mya...",MODERNA
2,EU-EC-10011128535,18-64,M,"[Bundle branch block right, C-reactive protein...",MODERNA
3,EU-EC-10011128589,65-85,F,"[COVID-19, Flank pain, SARS-CoV-2 test positive]",MODERNA
4,EU-EC-10011128599,Not Specified,F,"[Ear pain, Nausea, Pain, Pyrexia, Tenderness]",MODERNA
...,...,...,...,...,...
393819,EU-EC-10014357459,65-85,M,"[Atrial fibrillation, Cardiac failure congesti...",MODERNA
393820,EU-EC-10014357555,Not Specified,M,"[COVID-19, COVID-19 immunisation, Interchange ...",MODERNA
393821,EU-EC-10014349445,Not Specified,M,[Death],MODERNA
393822,EU-EC-10014351587,Not Specified,M,[Death],MODERNA


In [14]:
x = data_EU_moderna.explode(["SYMPTOMS_LIST"])
x

Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST,VAX_MANU
0,EU-EC-10011128459,18-64,F,Eye pain,MODERNA
0,EU-EC-10011128459,18-64,F,Migraine,MODERNA
0,EU-EC-10011128459,18-64,F,Vomiting,MODERNA
1,EU-EC-10011128525,18-64,F,Chest pain,MODERNA
1,EU-EC-10011128525,18-64,F,Dizziness,MODERNA
...,...,...,...,...,...
393820,EU-EC-10014357555,Not Specified,M,COVID-19 immunisation,MODERNA
393820,EU-EC-10014357555,Not Specified,M,Interchange of vaccine products,MODERNA
393821,EU-EC-10014349445,Not Specified,M,Death,MODERNA
393822,EU-EC-10014351587,Not Specified,M,Death,MODERNA


In [15]:
x["SYMPTOMS_LIST"].value_counts().head(20)

SYMPTOMS_LIST
Headache                   93583
Pyrexia                    89640
Fatigue                    79991
Myalgia                    63877
Chills                     61459
Malaise                    51228
Nausea                     46470
Injection site pain        40115
Arthralgia                 39163
Dizziness                  27891
Pain in extremity          24153
Vaccination site pain      22165
Lymphadenopathy            21368
Dyspnoea                   18660
Pain                       18479
Injection site erythema    17769
Injection site swelling    17464
COVID-19                   16496
Asthenia                   15119
Vomiting                   14050
Name: count, dtype: int64

In [16]:
data_EU_pfizer = pd.DataFrame()
for file_name in glob.glob('EU Files/EU Pfizer' + '*.csv'):
    x = pd.read_csv(file_name, low_memory = False)
    data_EU_pfizer = pd.concat([data_EU_pfizer, x], axis = 0, ignore_index = True)
    print(file_name)
# data_EU_pfizer.head(2)
data_EU_pfizer.columns = ["EU_ID", "1", "2", "3", "4", "5", "AGE_GROUP", "6", "7", "SEX", "SYMPTOMS_LIST", "8", "9", "10"]
data_EU_pfizer = data_EU_pfizer[["EU_ID", "AGE_GROUP", "SEX", "SYMPTOMS_LIST"]]
# data_EU_pfizer.head(2)
data_EU_pfizer["SYMPTOMS_LIST"] = data_EU_pfizer["SYMPTOMS_LIST"].apply(extract_symptoms)
# data_EU_pfizer
data_EU_pfizer["AGE_GROUP"] = data_EU_pfizer["AGE_GROUP"].astype(str).str.replace("Years", "").str.strip()
# data_EU_pfizer
data_EU_pfizer["SEX"] = data_EU_pfizer["SEX"].astype(str).str.replace("emale", "").str.strip()
data_EU_pfizer["SEX"] = data_EU_pfizer["SEX"].astype(str).str.replace("ale", "").str.strip()
data_EU_pfizer["SEX"] = data_EU_pfizer["SEX"].astype(str).str.replace("Not Specified", "U").str.strip()
# data_EU_pfizer
data_EU_pfizer['VAX_MANU'] = "PFIZER"
data_EU_pfizer

EU Files\EU Pfizer 1 2021.csv
EU Files\EU Pfizer 1 2022.csv
EU Files\EU Pfizer 1 2023.csv
EU Files\EU Pfizer 2 2023.csv
EU Files\EU Pfizer 3 2022.csv
EU Files\EU Pfizer 3 2023.csv
EU Files\EU Pfizer 4 2022.csv
EU Files\EU Pfizer 4 2023.csv


Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST,VAX_MANU
0,EU-EC-10011128486,18-64,M,[Alopecia areata],PFIZER
1,EU-EC-10011128558,65-85,F,[Neuralgia],PFIZER
2,EU-EC-10011128590,18-64,M,"[Myocardial infarction, Resuscitation, Ventric...",PFIZER
3,EU-EC-10011128595,18-64,M,[Vaccination failure],PFIZER
4,EU-EC-10011128635,65-85,F,"[Arthralgia, Confusional state, Disturbance in...",PFIZER
...,...,...,...,...,...
602907,EU-EC-10014349651,65-85,M,"[Herpes zoster, Ophthalmic herpes zoster, Prur...",PFIZER
602908,EU-EC-10014349759,18-64,F,"[COVID-19, Drug ineffective]",PFIZER
602909,EU-EC-10014350772,65-85,F,"[Atrial fibrillation, Diarrhoea]",PFIZER
602910,EU-EC-10014351070,18-64,M,"[Disturbance in attention, Impaired quality of...",PFIZER


In [17]:
data_EU_astrazeneca = pd.DataFrame()
for file_name in glob.glob('EU Files/EU Astrazeneca' + '*.csv'):
    x = pd.read_csv(file_name, low_memory = False)
    data_EU_astrazeneca = pd.concat([data_EU_astrazeneca, x], axis = 0, ignore_index = True)
    print(file_name)
# data_EU_astrazeneca.head(2)
data_EU_astrazeneca.columns = ["EU_ID", "1", "2", "3", "4", "5", "AGE_GROUP", "6", "7", "SEX", "SYMPTOMS_LIST", "8", "9", "10"]
data_EU_astrazeneca = data_EU_astrazeneca[["EU_ID", "AGE_GROUP", "SEX", "SYMPTOMS_LIST"]]
# data_EU_astrazeneca.head(2)
data_EU_astrazeneca["SYMPTOMS_LIST"] = data_EU_astrazeneca["SYMPTOMS_LIST"].apply(extract_symptoms)
# data_EU_astrazeneca
data_EU_astrazeneca["AGE_GROUP"] = data_EU_astrazeneca["AGE_GROUP"].astype(str).str.replace("Years", "").str.strip()
# data_EU_astrazeneca
data_EU_astrazeneca["SEX"] = data_EU_astrazeneca["SEX"].astype(str).str.replace("emale", "").str.strip()
data_EU_astrazeneca["SEX"] = data_EU_astrazeneca["SEX"].astype(str).str.replace("ale", "").str.strip()
data_EU_astrazeneca["SEX"] = data_EU_astrazeneca["SEX"].astype(str).str.replace("Not Specified", "U").str.strip()
# data_EU_astrazeneca
data_EU_astrazeneca['VAX_MANU'] = "ASTRAZENECA"
data_EU_astrazeneca

EU Files\EU Astrazeneca 1 2021.csv
EU Files\EU Astrazeneca 1 2022.csv
EU Files\EU Astrazeneca 1 2023.csv


Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST,VAX_MANU
0,EU-EC-10011128636,18-64,M,"[Dysaesthesia, Muscle contractions involuntary...",ASTRAZENECA
1,EU-EC-10011128637,65-85,F,"[Abdominal pain upper, Blood pressure increase...",ASTRAZENECA
2,EU-EC-10011128695,18-64,M,[Tinnitus],ASTRAZENECA
3,EU-EC-10011128705,65-85,F,[Urticaria],ASTRAZENECA
4,EU-EC-10011128724,65-85,M,[Cardiac failure],ASTRAZENECA
...,...,...,...,...,...
427815,EU-EC-10014351569,Not Specified,M,"[Arthralgia, Chills, Colitis, Colitis ulcerati...",ASTRAZENECA
427816,EU-EC-10014351631,65-85,M,"[Cardiovascular disorder, Interchange of vacci...",ASTRAZENECA
427817,EU-EC-10014352064,More than 85,F,"[Glaucoma, Macular degeneration, Neck pain, Ox...",ASTRAZENECA
427818,EU-EC-10014352150,18-64,F,"[Abdominal pain, Alopecia, Asthenia, Decreased...",ASTRAZENECA


In [18]:
data_EU_janssen = pd.DataFrame()
for file_name in glob.glob('EU Files/EU Janssen' + '*.csv'):
    x = pd.read_csv(file_name, low_memory = False)
    data_EU_janssen = pd.concat([data_EU_janssen, x], axis = 0, ignore_index = True)
    print(file_name)
# data_EU_janssen.head(2)
data_EU_janssen.columns = ["EU_ID", "1", "2", "3", "4", "5", "AGE_GROUP", "6", "7", "SEX", "SYMPTOMS_LIST", "8", "9", "10"]
data_EU_janssen = data_EU_janssen[["EU_ID", "AGE_GROUP", "SEX", "SYMPTOMS_LIST"]]
# data_EU_janssen.head(2)
data_EU_janssen["SYMPTOMS_LIST"] = data_EU_janssen["SYMPTOMS_LIST"].apply(extract_symptoms)
# data_EU_janssen
data_EU_janssen["AGE_GROUP"] = data_EU_janssen["AGE_GROUP"].astype(str).str.replace("Years", "").str.strip()
# data_EU_janssen
data_EU_janssen["SEX"] = data_EU_janssen["SEX"].astype(str).str.replace("emale", "").str.strip()
data_EU_janssen["SEX"] = data_EU_janssen["SEX"].astype(str).str.replace("ale", "").str.strip()
data_EU_janssen["SEX"] = data_EU_janssen["SEX"].astype(str).str.replace("Not Specified", "U").str.strip()
# data_EU_janssen
data_EU_janssen['VAX_MANU'] = "JANSSEN"
data_EU_janssen

EU Files\EU Janssen 1 2021.csv
EU Files\EU Janssen 1 2022.csv
EU Files\EU Janssen 1 2023.csv


Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST,VAX_MANU
0,EU-EC-10011128588,Not Specified,M,[Vaccination failure],JANSSEN
1,EU-EC-10011128704,18-64,M,"[Asthenia, Myalgia, Vertigo]",JANSSEN
2,EU-EC-10011129053,18-64,M,"[Arthralgia, Bone marrow oedema]",JANSSEN
3,EU-EC-10011129123,18-64,F,"[Hypertension, Tachycardia]",JANSSEN
4,EU-EC-10011129244,18-64,M,"[COVID-19, Vaccination failure]",JANSSEN
...,...,...,...,...,...
71560,EU-EC-10014347423,Not Specified,M,"[Back pain, Dizziness, Pain in extremity, Pyre...",JANSSEN
71561,EU-EC-10014347424,18-64,M,"[Arthritis, Pyrexia]",JANSSEN
71562,EU-EC-10014347425,18-64,F,"[Asthenia, Impaired work ability, Pyrexia, Vac...",JANSSEN
71563,EU-EC-10014347426,18-64,M,"[Arthralgia, Headache, Pyrexia]",JANSSEN


In [19]:
data_EU_novavax = pd.DataFrame()
for file_name in glob.glob('EU Files/EU Novavax' + '*.csv'):
    x = pd.read_csv(file_name, low_memory = False)
    data_EU_novavax = pd.concat([data_EU_novavax, x], axis = 0, ignore_index = True)
    print(file_name)
# data_EU_novavax.head(2)
data_EU_novavax.columns = ["EU_ID", "1", "2", "3", "4", "5", "AGE_GROUP", "6", "7", "SEX", "SYMPTOMS_LIST", "8", "9", "10"]
data_EU_novavax = data_EU_novavax[["EU_ID", "AGE_GROUP", "SEX", "SYMPTOMS_LIST"]]
# data_EU_novavax.head(2)
data_EU_novavax["SYMPTOMS_LIST"] = data_EU_novavax["SYMPTOMS_LIST"].apply(extract_symptoms)
# data_EU_novavax
data_EU_novavax["AGE_GROUP"] = data_EU_novavax["AGE_GROUP"].astype(str).str.replace("Years", "").str.strip()
# data_EU_novavax
data_EU_novavax["SEX"] = data_EU_novavax["SEX"].astype(str).str.replace("emale", "").str.strip()
data_EU_novavax["SEX"] = data_EU_novavax["SEX"].astype(str).str.replace("ale", "").str.strip()
data_EU_novavax["SEX"] = data_EU_novavax["SEX"].astype(str).str.replace("Not Specified", "U").str.strip()
# data_EU_novavax
data_EU_novavax['VAX_MANU'] = "NOVAVAX"
data_EU_novavax

EU Files\EU Novavax 1 2022.csv
EU Files\EU Novavax 1 2023.csv
EU Files\EU Novavax 2 2023.csv


Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST,VAX_MANU
0,EU-EC-10014341832,65-85,M,"[Myalgia, Pain, Peripheral swelling]",NOVAVAX
1,EU-EC-10014334441,18-64,F,"[Fatigue, Headache, Tension headache]",NOVAVAX
2,EU-EC-10014335705,18-64,M,"[Herpes zoster, Muscular weakness, Neuralgia, ...",NOVAVAX
3,EU-EC-10014337174,18-64,M,[Adverse event],NOVAVAX
4,EU-EC-10014337505,18-64,M,"[Chest discomfort, Hypoaesthesia, Mobility dec...",NOVAVAX
...,...,...,...,...,...
1679,EU-EC-10014351380,18-64,M,"[Pericardial effusion, Pericarditis]",NOVAVAX
1680,EU-EC-10016516063,18-64,F,"[Pain in extremity, Vaccination site erythema,...",NOVAVAX
1681,EU-EC-10016508453,18-64,F,"[Chills, Headache, Insomnia, Neuropathy periph...",NOVAVAX
1682,EU-EC-10016392820,65-85,F,"[Body temperature increased, Chest pain, Dizzi...",NOVAVAX


In [20]:
data_EU_valneva = pd.DataFrame()
for file_name in glob.glob('EU Files/EU Valneva' + '*.csv'):
    x = pd.read_csv(file_name, low_memory = False)
    data_EU_valneva = pd.concat([data_EU_valneva, x], axis = 0, ignore_index = True)
    print(file_name)
# data_EU_valneva.head(2)
data_EU_valneva.columns = ["EU_ID", "1", "2", "3", "4", "5", "AGE_GROUP", "6", "7", "SEX", "SYMPTOMS_LIST", "8", "9", "10"]
data_EU_valneva = data_EU_valneva[["EU_ID", "AGE_GROUP", "SEX", "SYMPTOMS_LIST"]]
# data_EU_valneva.head(2)
data_EU_valneva["SYMPTOMS_LIST"] = data_EU_valneva["SYMPTOMS_LIST"].apply(extract_symptoms)
# data_EU_valneva
data_EU_valneva["AGE_GROUP"] = data_EU_valneva["AGE_GROUP"].astype(str).str.replace("Years", "").str.strip()
# data_EU_valneva
data_EU_valneva["SEX"] = data_EU_valneva["SEX"].astype(str).str.replace("emale", "").str.strip()
data_EU_valneva["SEX"] = data_EU_valneva["SEX"].astype(str).str.replace("ale", "").str.strip()
data_EU_valneva["SEX"] = data_EU_valneva["SEX"].astype(str).str.replace("Not Specified", "U").str.strip()
# data_EU_valneva
data_EU_valneva['VAX_MANU'] = "VALNEVA"
data_EU_valneva

EU Files\EU Valneva 1 2022.csv
EU Files\EU Valneva 1 2023.csv


Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST,VAX_MANU
0,EU-EC-10014179682,18-64,F,"[Headache, Off label use, Taste disorder]",VALNEVA
1,EU-EC-10014166642,18-64,M,"[Anaphylactic reaction, Dyspnoea, Off label use]",VALNEVA
2,EU-EC-10014171307,Not Specified,F,"[Interchange of vaccine products, Off label use]",VALNEVA
3,EU-EC-10014161079,18-64,F,"[Headache, Taste disorder]",VALNEVA
4,EU-EC-10014092652,65-85,F,"[Limb immobilisation, Paraesthesia]",VALNEVA
5,EU-EC-10014037360,Not Specified,M,"[Interchange of vaccine products, No adverse e...",VALNEVA
6,EU-EC-10013924383,65-85,M,"[Off label use, Vaccination site warmth]",VALNEVA
7,EU-EC-10013806943,65-85,F,"[Interchange of vaccine products, Off label us...",VALNEVA
8,EU-EC-10013806979,Not Specified,M,"[Interchange of vaccine products, Off label use]",VALNEVA
9,EU-EC-10013807664,65-85,M,"[Body temperature increased, Interchange of va...",VALNEVA


In [21]:
data_EU_vidprevtyn = pd.DataFrame()
for file_name in glob.glob('EU Files/EU Vidprevtyn' + '*.csv'):
    x = pd.read_csv(file_name, low_memory = False)
    data_EU_vidprevtyn = pd.concat([data_EU_vidprevtyn, x], axis = 0, ignore_index = True)
    print(file_name)
# data_EU_vidprevtyn.head(2)
data_EU_vidprevtyn.columns = ["EU_ID", "1", "2", "3", "4", "5", "AGE_GROUP", "6", "7", "SEX", "SYMPTOMS_LIST", "8", "9", "10"]
data_EU_vidprevtyn = data_EU_vidprevtyn[["EU_ID", "AGE_GROUP", "SEX", "SYMPTOMS_LIST"]]
# data_EU_vidprevtyn.head(2)
data_EU_vidprevtyn["SYMPTOMS_LIST"] = data_EU_vidprevtyn["SYMPTOMS_LIST"].apply(extract_symptoms)
# data_EU_vidprevtyn
data_EU_vidprevtyn["AGE_GROUP"] = data_EU_vidprevtyn["AGE_GROUP"].astype(str).str.replace("Years", "").str.strip()
# data_EU_vidprevtyn
data_EU_vidprevtyn["SEX"] = data_EU_vidprevtyn["SEX"].astype(str).str.replace("emale", "").str.strip()
data_EU_vidprevtyn["SEX"] = data_EU_vidprevtyn["SEX"].astype(str).str.replace("ale", "").str.strip()
data_EU_vidprevtyn["SEX"] = data_EU_vidprevtyn["SEX"].astype(str).str.replace("Not Specified", "U").str.strip()
# data_EU_vidprevtyn
data_EU_vidprevtyn['VAX_MANU'] = "VIDPREVTYN"
data_EU_vidprevtyn

EU Files\EU Vidprevtyn 1 2023.csv


Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST,VAX_MANU
0,EU-EC-10016550757,18-64,F,"[Fatigue, Pain in extremity]",VIDPREVTYN
1,EU-EC-10016535561,18-64,F,"[Asthenia, Cyanosis, Diarrhoea, Fatigue, Heada...",VIDPREVTYN
2,EU-EC-10016488047,Not Specified,F,[Dizziness],VIDPREVTYN
3,EU-EC-10016442411,65-85,F,[Peripheral swelling],VIDPREVTYN
4,EU-EC-10016443315,65-85,F,[Dizziness],VIDPREVTYN
...,...,...,...,...,...
411,EU-EC-10015031220,18-64,F,"[Asthenia, Musculoskeletal stiffness, Myalgia,...",VIDPREVTYN
412,EU-EC-10014987895,18-64,M,"[Eye pruritus, Ocular discomfort]",VIDPREVTYN
413,EU-EC-10014940221,65-85,U,"[Feeling cold, Influenza like illness, Pain in...",VIDPREVTYN
414,EU-EC-10014652497,18-64,M,"[Gastrointestinal disorder, Immunisation react...",VIDPREVTYN


In [22]:
data_EU_full = pd.concat([data_EU_astrazeneca, data_EU_janssen, data_EU_moderna, data_EU_novavax, data_EU_pfizer, data_EU_valneva, data_EU_vidprevtyn], axis = 0, ignore_index = True)
data_EU_full

Unnamed: 0,EU_ID,AGE_GROUP,SEX,SYMPTOMS_LIST,VAX_MANU
0,EU-EC-10011128636,18-64,M,"[Dysaesthesia, Muscle contractions involuntary...",ASTRAZENECA
1,EU-EC-10011128637,65-85,F,"[Abdominal pain upper, Blood pressure increase...",ASTRAZENECA
2,EU-EC-10011128695,18-64,M,[Tinnitus],ASTRAZENECA
3,EU-EC-10011128705,65-85,F,[Urticaria],ASTRAZENECA
4,EU-EC-10011128724,65-85,M,[Cardiac failure],ASTRAZENECA
...,...,...,...,...,...
1498250,EU-EC-10015031220,18-64,F,"[Asthenia, Musculoskeletal stiffness, Myalgia,...",VIDPREVTYN
1498251,EU-EC-10014987895,18-64,M,"[Eye pruritus, Ocular discomfort]",VIDPREVTYN
1498252,EU-EC-10014940221,65-85,U,"[Feeling cold, Influenza like illness, Pain in...",VIDPREVTYN
1498253,EU-EC-10014652497,18-64,M,"[Gastrointestinal disorder, Immunisation react...",VIDPREVTYN


In [23]:
# data_EU_full.to_csv("Final Files/EU Full Data.csv", index = False)