# Depression Adverse Event Table Notebook 

## Importing Libraries

In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import os
import time

In [28]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Read in Tables

In [29]:
unique_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/distinct_reported_events.csv')

In [30]:
smq_mapped = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/smq_edited.csv')

In [31]:
smq_mapped.head()

Unnamed: 0,Row ID,LLT/PT Term,LLT/PT Code,PT
0,0,V,,
1,1,K,10023271.0,Blood potassium
2,2,PE,10034191.0,Pulmonary embolism
3,3,GI,,
4,4,ER,,


In [32]:
trial_by_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/subset_adverse_events files/depression_subset_adverse_events.csv', index_col=0)

In [33]:
trial_by_ae.head()

Unnamed: 0,nct_id,id,adverse_events
1,NCT03315208,121187683.0,{relapsetosubstancesresultingininpatienttreatm...
2,NCT03315208,121187684.0,{relapsetosubstancesresultingininpatienttreatm...
3,NCT03315208,121187685.0,{relapsetosubstancesresultingininpatienttreatm...
4,NCT03315208,121187686.0,{relapsetosubstancesresultinginemergencydepart...
5,NCT03315208,121187687.0,{relapsetosubstancesresultinginemergencydepart...


In [34]:
unique_id = pd.DataFrame(trial_by_ae['id'].unique(), columns = ['id'])

In [35]:
unique_id.head()

Unnamed: 0,id
0,121187683.0
1,121187684.0
2,121187685.0
3,121187686.0
4,121187687.0


In [36]:
unique_ae_case = pd.DataFrame(trial_by_ae['adverse_events'].unique(), columns = ['lower'])
unique_ae_case['lower'] = unique_ae_case['lower'].str.slice(1,-1,1)

In [37]:
unique_ae_case.head()

Unnamed: 0,lower
0,relapsetosubstancesresultingininpatienttreatment
1,relapsetosubstancesresultinginemergencydepartm...
2,activesuicidalideationwithplanresultingininpat...
3,activesuicidalideationwithplanwithoutinpatient...
4,increaseinpsychoticsymptomsresultingininpatien...


In [38]:
unique_ae = unique_ae_case.merge(unique_ae, how='inner', on='lower')

In [39]:
unique_ae.shape, unique_id.shape, smq_mapped.shape, trial_by_ae.shape

((8392, 2), (28056, 1), (101145, 4), (28156, 3))

## Data Cleaning

## Sort for analysis

In [40]:
unique_ae.sort_values(by='adverse_event_term', key=lambda x: x.str.len(), inplace=True)

## Drop duplicates and NaN values

In [41]:
unique_ae.drop_duplicates('lower', inplace=True)
unique_ae.dropna(axis=0, inplace=True)
unique_ae.reset_index(drop=True, inplace=True)
smq_mapped.drop('Row ID', axis=1, inplace=True)

In [42]:
trial_by_ae.dropna(axis=0, inplace=True)
trial_by_ae.reset_index(drop=True, inplace=True)

In [43]:
term_hits = pd.DataFrame(smq_mapped.dropna(axis=0))

In [44]:
unique_ae.shape, smq_mapped.shape, term_hits.shape, trial_by_ae.shape, unique_id.shape

((2432, 2), (101145, 3), (24767, 3), (28055, 3), (28056, 1))

In [45]:
unique_ae.head()

Unnamed: 0,lower,adverse_event_term
0,flu,FLU
1,tic,Tic
2,eye,Eye
3,ear,ear
4,mass,mass


In [46]:
ae_lookup_df = unique_ae.merge(term_hits, how='inner', left_on='adverse_event_term', right_on='LLT/PT Term')

In [47]:
ae_lookup_df.head()

Unnamed: 0,lower,adverse_event_term,LLT/PT Term,LLT/PT Code,PT
0,flu,FLU,FLU,10016790.0,Influenza
1,tic,Tic,Tic,10043833.0,Tic
2,rash,rash,rash,10037844.0,Rash
3,pain,pain,pain,10033371.0,Pain
4,manic,Manic,Manic,10026752.0,Mania


In [48]:
ae_lookup_df.shape

(547, 5)

## Create Final Table

In [49]:
final_ae = pd.DataFrame(columns=ae_lookup_df['PT'].unique(), index=trial_by_ae['id'])

In [50]:
final_ae.shape

(28055, 479)

In [51]:
final_ae.head()

Unnamed: 0_level_0,Influenza,Tic,Rash,Pain,Mania,Crime,Anger,Wound,Dizziness,Muscle spasms,...,Blood pressure diastolic decreased,Human chorionic gonadotropin,Electrocardiogram ST segment abnormal,Tuberculosis of peripheral lymph nodes,Eye complication associated with device,Blood thyroid stimulating hormone decreased,Pancreatic neuroendocrine tumour metastatic,Insomnia,Adjustment disorder with disturbance of conduct,Adjustment disorder with mixed anxiety and depressed mood
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
121187683.0,,,,,,,,,,,...,,,,,,,,,,
121187684.0,,,,,,,,,,,...,,,,,,,,,,
121187685.0,,,,,,,,,,,...,,,,,,,,,,
121187686.0,,,,,,,,,,,...,,,,,,,,,,
121187687.0,,,,,,,,,,,...,,,,,,,,,,


In [None]:
for i in range(trial_by_ae.shape[0]):
  check = 0
  ind = trial_by_ae['id'][i]
  term = trial_by_ae['adverse_events'][i][1:-1]
  if (ae_lookup_df['lower'] == term).any():
    adverse_event = ae_lookup_df.loc[ae_lookup_df['lower'] == term]
    adverse_event = adverse_event['PT'].values[0]
    final_ae.loc[ind, adverse_event] = 1
    check = 1
  if check == 0:
    final_ae.drop(ind)

In [None]:
final_ae.shape

In [None]:
final_ae.dropna(axis=0, how='all', inplace=True)
final_ae.fillna(0, inplace=True)
final_ae.shape

In [None]:
depression_ae_table = final_ae

In [None]:
depression_ae_table.to_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/Final AE Tables/depression_ae_table.csv')

# Anxiety Adverse Event Table Notebook 

## Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import os
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Read in Tables

In [None]:
unique_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/distinct_reported_events.csv')

In [None]:
smq_mapped = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/smq_edited.csv')

In [None]:
smq_mapped.head()

Unnamed: 0,Row ID,LLT/PT Term,LLT/PT Code,PT
0,0,V,,
1,1,K,10023271.0,Blood potassium
2,2,PE,10034191.0,Pulmonary embolism
3,3,GI,,
4,4,ER,,


In [None]:
trial_by_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/subset_adverse_events files/anxiety_subset_adverse_events.csv', index_col=0)

In [None]:
trial_by_ae.head()

Unnamed: 0,nct_id,id,adverse_events
1,NCT03315208,121187683.0,{relapsetosubstancesresultingininpatienttreatm...
2,NCT03315208,121187684.0,{relapsetosubstancesresultingininpatienttreatm...
3,NCT03315208,121187685.0,{relapsetosubstancesresultingininpatienttreatm...
4,NCT03315208,121187686.0,{relapsetosubstancesresultinginemergencydepart...
5,NCT03315208,121187687.0,{relapsetosubstancesresultinginemergencydepart...


In [None]:
unique_id = pd.DataFrame(trial_by_ae['id'].unique(), columns = ['id'])

In [None]:
unique_id.head()

Unnamed: 0,id
0,121187683.0
1,121187684.0
2,121187685.0
3,121187686.0
4,121187687.0


In [None]:
unique_ae_case = pd.DataFrame(trial_by_ae['adverse_events'].unique(), columns = ['lower'])
unique_ae_case['lower'] = unique_ae_case['lower'].str.slice(1,-1,1)

In [None]:
unique_ae_case.head()

Unnamed: 0,lower
0,relapsetosubstancesresultingininpatienttreatment
1,relapsetosubstancesresultinginemergencydepartm...
2,activesuicidalideationwithplanresultingininpat...
3,activesuicidalideationwithplanwithoutinpatient...
4,increaseinpsychoticsymptomsresultingininpatien...


In [None]:
unique_ae = unique_ae_case.merge(unique_ae, how='inner', on='lower')

In [None]:
unique_ae.shape, unique_id.shape, smq_mapped.shape, trial_by_ae.shape

((2663, 2), (3863, 1), (101145, 4), (3930, 3))

## Data Cleaning

## Sort for analysis

In [None]:
unique_ae.sort_values(by='adverse_event_term', key=lambda x: x.str.len(), inplace=True)

## Drop duplicates and NaN values

In [None]:
unique_ae.drop_duplicates('lower', inplace=True)
unique_ae.dropna(axis=0, inplace=True)
unique_ae.reset_index(drop=True, inplace=True)
smq_mapped.drop('Row ID', axis=1, inplace=True)

In [None]:
trial_by_ae.dropna(axis=0, inplace=True)
trial_by_ae.reset_index(drop=True, inplace=True)

In [None]:
term_hits = pd.DataFrame(smq_mapped.dropna(axis=0))

In [None]:
unique_ae.shape, smq_mapped.shape, term_hits.shape, trial_by_ae.shape, unique_id.shape

((677, 2), (101145, 3), (24767, 3), (3862, 3), (3863, 1))

In [None]:
unique_ae.head()

Unnamed: 0,lower,adverse_event_term
0,gi,GI
1,ear,ear
2,uri,Uri
3,gas,GAS
4,eye,eye


In [None]:
ae_lookup_df = unique_ae.merge(term_hits, how='inner', left_on='adverse_event_term', right_on='LLT/PT Term')

In [None]:
ae_lookup_df.head()

Unnamed: 0,lower,adverse_event_term,LLT/PT Term,LLT/PT Code,PT
0,gas,GAS,GAS,10017735.0,Flatulence
1,acne,ACNE,ACNE,10000496.0,Acne
2,rash,rash,rash,10037844.0,Rash
3,pica,Pica,Pica,10035001.0,Pica
4,death,DEATH,DEATH,10011906.0,Death


In [None]:
ae_lookup_df.shape

(127, 5)

## Create Final Table

In [None]:
final_ae = pd.DataFrame(columns=ae_lookup_df['PT'].unique(), index=trial_by_ae['id'])

In [None]:
final_ae.shape

(3862, 113)

In [None]:
final_ae.head()

Unnamed: 0_level_0,Flatulence,Acne,Rash,Pica,Death,Pyrexia,Anger,Oedema,Neoplasm malignant,Somnolence,...,Hyperemesis gravidarum,Self-injurious ideation,Blood uric acid increased,Blood bilirubin increased,Mean cell volume decreased,Musculoskeletal chest pain,Blood cholesterol increased,Generalised anxiety disorder,White blood cells urine positive,Glycosylated haemoglobin increased
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
121187683.0,,,,,,,,,,,...,,,,,,,,,,
121187684.0,,,,,,,,,,,...,,,,,,,,,,
121187685.0,,,,,,,,,,,...,,,,,,,,,,
121187686.0,,,,,,,,,,,...,,,,,,,,,,
121187687.0,,,,,,,,,,,...,,,,,,,,,,


In [None]:
for i in range(trial_by_ae.shape[0]):
  check = 0
  ind = trial_by_ae['id'][i]
  term = trial_by_ae['adverse_events'][i][1:-1]
  if (ae_lookup_df['lower'] == term).any():
    adverse_event = ae_lookup_df.loc[ae_lookup_df['lower'] == term]
    adverse_event = adverse_event['PT'].values[0]
    final_ae.loc[ind, adverse_event] = 1
    check = 1
  if check == 0:
    final_ae.drop(ind)

In [None]:
final_ae.shape

(3862, 113)

In [None]:
final_ae.dropna(axis=0, how='all', inplace=True)
final_ae.fillna(0, inplace=True)
final_ae.shape

(901, 113)

In [None]:
anxiety_ae_table = final_ae

In [None]:
anxiety_ae_table.to_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/Final AE Tables/anxiety_ae_table.csv')

# Bipolar Adverse Event Table Notebook 

## Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import os
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Read in Tables

In [None]:
unique_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/distinct_reported_events.csv')

In [None]:
smq_mapped = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/smq_edited.csv')

In [None]:
smq_mapped.head()

Unnamed: 0,Row ID,LLT/PT Term,LLT/PT Code,PT
0,0,V,,
1,1,K,10023271.0,Blood potassium
2,2,PE,10034191.0,Pulmonary embolism
3,3,GI,,
4,4,ER,,


In [None]:
trial_by_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/subset_adverse_events files/bipolar_subset_adverse_events.csv', index_col=0)

In [None]:
trial_by_ae.head()

Unnamed: 0,nct_id,id,adverse_events
1,NCT00811473,124611912.0,{agitation}
2,NCT00811473,124611913.0,{agitation}
3,NCT00811473,124611914.0,{socialstayhospitalisation}
4,NCT00811473,124611915.0,{socialstayhospitalisation}
5,NCT00811473,124611916.0,{bipolardisorder}


In [None]:
unique_id = pd.DataFrame(trial_by_ae['id'].unique(), columns = ['id'])

In [None]:
unique_id.head()

Unnamed: 0,id
0,124611912.0
1,124611913.0
2,124611914.0
3,124611915.0
4,124611916.0


In [None]:
unique_ae_case = pd.DataFrame(trial_by_ae['adverse_events'].unique(), columns = ['lower'])
unique_ae_case['lower'] = unique_ae_case['lower'].str.slice(1,-1,1)

In [None]:
unique_ae_case.head()

Unnamed: 0,lower
0,agitation
1,socialstayhospitalisation
2,bipolardisorder
3,aggression
4,depressivesymptoms


In [None]:
unique_ae = unique_ae_case.merge(unique_ae, how='inner', on='lower')

In [None]:
unique_ae.shape, unique_id.shape, smq_mapped.shape, trial_by_ae.shape

((4152, 2), (8226, 1), (101145, 4), (8242, 3))

## Data Cleaning

## Sort for analysis

In [None]:
unique_ae.sort_values(by='adverse_event_term', key=lambda x: x.str.len(), inplace=True)

## Drop duplicates and NaN values

In [None]:
unique_ae.drop_duplicates('lower', inplace=True)
unique_ae.dropna(axis=0, inplace=True)
unique_ae.reset_index(drop=True, inplace=True)
smq_mapped.drop('Row ID', axis=1, inplace=True)

In [None]:
trial_by_ae.dropna(axis=0, inplace=True)
trial_by_ae.reset_index(drop=True, inplace=True)

In [None]:
term_hits = pd.DataFrame(smq_mapped.dropna(axis=0))

In [None]:
unique_ae.shape, smq_mapped.shape, term_hits.shape, trial_by_ae.shape, unique_id.shape

((1087, 2), (101145, 3), (24767, 3), (8225, 3), (8226, 1))

In [None]:
unique_ae.head()

Unnamed: 0,lower,adverse_event_term
0,flu,FLU
1,uri,Uri
2,pain,Pain
3,coma,Coma
4,burn,burn


In [None]:
ae_lookup_df = unique_ae.merge(term_hits, how='inner', left_on='adverse_event_term', right_on='LLT/PT Term')

In [None]:
ae_lookup_df.head()

Unnamed: 0,lower,adverse_event_term,LLT/PT Term,LLT/PT Code,PT
0,flu,FLU,FLU,10016790.0,Influenza
1,coma,Coma,Coma,10010071.0,Coma
2,fever,fever,fever,10016558.0,Pyrexia
3,wound,Wound,Wound,10052428.0,Wound
4,cough,COUGH,COUGH,10011224.0,Cough


In [None]:
ae_lookup_df.shape

(275, 5)

## Create Final Table

In [None]:
final_ae = pd.DataFrame(columns=ae_lookup_df['PT'].unique(), index=trial_by_ae['id'])

In [None]:
final_ae.shape

(8225, 236)

In [None]:
final_ae.head()

Unnamed: 0_level_0,Influenza,Coma,Pyrexia,Wound,Cough,Anaemia,Bezoar,Hunger,Seizure,Empyema,...,Altered state of consciousness,Transposition of the great vessels,Wolff-Parkinson-White syndrome,Disturbance in social behaviour,Arteriosclerosis coronary artery,Intervertebral disc degeneration,Lower respiratory tract infection,Blood pressure systolic decreased,Autonomic nervous system imbalance,Complication associated with device
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
124611912.0,,,,,,,,,,,...,,,,,,,,,,
124611913.0,,,,,,,,,,,...,,,,,,,,,,
124611914.0,,,,,,,,,,,...,,,,,,,,,,
124611915.0,,,,,,,,,,,...,,,,,,,,,,
124611916.0,,,,,,,,,,,...,,,,,,,,,,


In [None]:
for i in range(trial_by_ae.shape[0]):
  check = 0
  ind = trial_by_ae['id'][i]
  term = trial_by_ae['adverse_events'][i][1:-1]
  if (ae_lookup_df['lower'] == term).any():
    adverse_event = ae_lookup_df.loc[ae_lookup_df['lower'] == term]
    adverse_event = adverse_event['PT'].values[0]
    final_ae.loc[ind, adverse_event] = 1
    check = 1
  if check == 0:
    final_ae.drop(ind)

In [None]:
final_ae.shape

(8225, 236)

In [None]:
final_ae.dropna(axis=0, how='all', inplace=True)
final_ae.fillna(0, inplace=True)
final_ae.shape

(2048, 236)

In [None]:
bipolar_ae_table = final_ae

In [None]:
bipolar_ae_table.to_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/Final AE Tables/bipolar_ae_table.csv')

# Alzheimers Adverse Event Table Notebook 

## Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import os
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### Read in Tables

In [None]:
trial_by_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/subset_adverse_events files/alzheimers_subset_adverse_events.csv', index_col=0)

In [None]:
trial_by_ae

Unnamed: 0,nct_id,id,adverse_events
1,NCT02585232,121987318.0,{hospitalizationsandemergencyroomvisits}
2,NCT02585232,121987319.0,{hospitalizationsandemergencyroomvisits}
3,NCT02585232,121987320.0,{housefire}
4,NCT02585232,121987321.0,{housefire}
5,NCT02585232,121987322.0,{duplicatemeasurescollected}
...,...,...,...
60571,NCT01699503,119237906.0,{incidentseveredepression}
60572,NCT01699503,119237907.0,{incidentseveredepression}
60573,NCT01699503,119237908.0,{incidentsevereanxiety}
60574,NCT01699503,119237909.0,{incidentsevereanxiety}


In [None]:
trial_by_ae_1 = trial_by_ae.iloc[:30287]
trial_by_ae_2 = trial_by_ae.iloc[30287:]

### complete for first half of AD ae table

In [None]:
unique_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/distinct_reported_events.csv')

In [None]:
smq_mapped = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/smq_edited.csv')

In [None]:
smq_mapped.head()

Unnamed: 0,Row ID,LLT/PT Term,LLT/PT Code,PT
0,0,V,,
1,1,K,10023271.0,Blood potassium
2,2,PE,10034191.0,Pulmonary embolism
3,3,GI,,
4,4,ER,,


In [None]:
trial_by_ae_1.head()

Unnamed: 0,nct_id,id,adverse_events
1,NCT02585232,121987318.0,{hospitalizationsandemergencyroomvisits}
2,NCT02585232,121987319.0,{hospitalizationsandemergencyroomvisits}
3,NCT02585232,121987320.0,{housefire}
4,NCT02585232,121987321.0,{housefire}
5,NCT02585232,121987322.0,{duplicatemeasurescollected}


In [None]:
unique_id = pd.DataFrame(trial_by_ae_1['id'].unique(), columns = ['id'])

In [None]:
unique_id.head()

Unnamed: 0,id
0,121987318.0
1,121987319.0
2,121987320.0
3,121987321.0
4,121987322.0


In [None]:
unique_ae_case = pd.DataFrame(trial_by_ae_1['adverse_events'].unique(), columns = ['lower'])
unique_ae_case['lower'] = unique_ae_case['lower'].str.slice(1,-1,1)

In [None]:
unique_ae_case.head()

Unnamed: 0,lower
0,hospitalizationsandemergencyroomvisits
1,housefire
2,duplicatemeasurescollected
3,anginaunstable
4,atrialfribrillation


In [None]:
unique_ae = unique_ae_case.merge(unique_ae, how='inner', on='lower')

In [None]:
unique_ae.shape, unique_id.shape, smq_mapped.shape, trial_by_ae_1.shape

((8346, 2), (30280, 1), (101145, 4), (30287, 3))

#### Data Cleaning

#### Sort for analysis

In [None]:
unique_ae.sort_values(by='adverse_event_term', key=lambda x: x.str.len(), inplace=True)

#### Drop duplicates and NaN values

In [None]:
unique_ae.drop_duplicates('lower', inplace=True)
unique_ae.dropna(axis=0, inplace=True)
unique_ae.reset_index(drop=True, inplace=True)
smq_mapped.drop('Row ID', axis=1, inplace=True)

In [None]:
trial_by_ae_1.dropna(axis=0, inplace=True)
trial_by_ae_1.reset_index(drop=True, inplace=True)

In [None]:
term_hits = pd.DataFrame(smq_mapped.dropna(axis=0))

In [None]:
unique_ae.shape, smq_mapped.shape, term_hits.shape, trial_by_ae_1.shape, unique_id.shape

((2129, 2), (101145, 3), (24767, 3), (30279, 3), (30280, 1))

In [None]:
unique_ae.head()

Unnamed: 0,lower,adverse_event_term
0,fall,fall
1,gout,Gout
2,rash,Rash
3,mass,MASS
4,cold,Cold


In [None]:
ae_lookup_df = unique_ae.merge(term_hits, how='inner', left_on='adverse_event_term', right_on='LLT/PT Term')

In [None]:
ae_lookup_df.head()

Unnamed: 0,lower,adverse_event_term,LLT/PT Term,LLT/PT Code,PT
0,flu,FLU,FLU,10016790.0,Influenza
1,dvt,dvt,dvt,10013877.0,Deep vein thrombosis
2,cyst,cyst,cyst,10011732.0,Cyst
3,itch,Itch,Itch,10023082.0,Pruritus
4,hives,HIVES,HIVES,10020197.0,Urticaria


In [None]:
ae_lookup_df.shape

(774, 5)

#### Create Final Table

In [None]:
final_ae_1 = pd.DataFrame(columns=ae_lookup_df['PT'].unique(), index=trial_by_ae_1['id'])

In [None]:
final_ae_1.shape

(30279, 606)

In [None]:
final_ae_1.head()

Unnamed: 0_level_0,Influenza,Deep vein thrombosis,Cyst,Pruritus,Urticaria,Shock,Eczema,Ataxia,Gait inability,Sepsis,...,Nasal cavity cancer,Lower respiratory tract infection bacterial,Tongue neoplasm malignant stage unspecified,Oesophageal squamous cell carcinoma stage IV,Neovascular age-related macular degeneration,Documented hypersensitivity to administered product,Neuropsychiatric symptoms,Amyloid related imaging abnormality-oedema/effusion,Drug reaction with eosinophilia and systemic symptoms,Amyloid related imaging abnormality-microhaemorrhages and haemosiderin deposits
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
121987318.0,,,,,,,,,,,...,,,,,,,,,,
121987319.0,,,,,,,,,,,...,,,,,,,,,,
121987320.0,,,,,,,,,,,...,,,,,,,,,,
121987321.0,,,,,,,,,,,...,,,,,,,,,,
121987322.0,,,,,,,,,,,...,,,,,,,,,,


In [None]:
for i in range(trial_by_ae_1.shape[0]):
  check = 0
  ind = trial_by_ae_1['id'][i]
  term = trial_by_ae_1['adverse_events'][i][1:-1]
  if (ae_lookup_df['lower'] == term).any():
    adverse_event = ae_lookup_df.loc[ae_lookup_df['lower'] == term]
    adverse_event = adverse_event['PT'].values[0]
    final_ae_1.loc[ind, adverse_event] = 1
    check = 1
  if check == 0:
    final_ae_1.drop(ind)

In [None]:
final_ae_1.shape

(30279, 606)

In [None]:
final_ae_1.dropna(axis=0, how='all', inplace=True)
final_ae_1.fillna(0, inplace=True)
final_ae_1.shape

(9073, 606)

In [None]:
alzheimers_ae_table_1 = final_ae_1

In [None]:
unique_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/distinct_reported_events.csv')

In [None]:
smq_mapped = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/smq_edited.csv')

In [None]:
smq_mapped.head()

Unnamed: 0,Row ID,LLT/PT Term,LLT/PT Code,PT
0,0,V,,
1,1,K,10023271.0,Blood potassium
2,2,PE,10034191.0,Pulmonary embolism
3,3,GI,,
4,4,ER,,


In [None]:
trial_by_ae_2.head()

Unnamed: 0,nct_id,id,adverse_events
30288,NCT01369225,118572540.0,{fall}
30289,NCT01369225,118572541.0,{ligamentsprain}
30290,NCT01369225,118572542.0,{ligamentsprain}
30291,NCT01369225,118572543.0,{ligamentsprain}
30292,NCT01369225,118572544.0,{ligamentsprain}


In [None]:
unique_id = pd.DataFrame(trial_by_ae_2['id'].unique(), columns = ['id'])

In [None]:
unique_id.head()

Unnamed: 0,id
0,118572540.0
1,118572541.0
2,118572542.0
3,118572543.0
4,118572544.0


In [None]:
unique_ae_case = pd.DataFrame(trial_by_ae_2['adverse_events'].unique(), columns = ['lower'])
unique_ae_case['lower'] = unique_ae_case['lower'].str.slice(1,-1,1)

In [None]:
unique_ae_case.head()

Unnamed: 0,lower
0,fall
1,ligamentsprain
2,spinalcompressionfracture
3,wristfracture
4,abdominalbruit


In [None]:
unique_ae = unique_ae_case.merge(unique_ae, how='inner', on='lower')

In [None]:
unique_ae.shape, unique_id.shape, smq_mapped.shape, trial_by_ae_2.shape

((8065, 2), (30282, 1), (101145, 4), (30288, 3))

## complete for second half of AD ae table

#### Data Cleaning

#### Sort for analysis

In [None]:
unique_ae.sort_values(by='adverse_event_term', key=lambda x: x.str.len(), inplace=True)

#### Drop duplicates and NaN values

In [None]:
unique_ae.drop_duplicates('lower', inplace=True)
unique_ae.dropna(axis=0, inplace=True)
unique_ae.reset_index(drop=True, inplace=True)
smq_mapped.drop('Row ID', axis=1, inplace=True)

KeyError: ignored

In [None]:
trial_by_ae_2.dropna(axis=0, inplace=True)
trial_by_ae_2.reset_index(drop=True, inplace=True)

In [None]:
term_hits = pd.DataFrame(smq_mapped.dropna(axis=0))

In [None]:
unique_ae.shape, smq_mapped.shape, term_hits.shape, trial_by_ae_2.shape, unique_id.shape

((2031, 2), (101145, 3), (24767, 3), (30281, 3), (30282, 1))

In [None]:
unique_ae.head()

Unnamed: 0,lower,adverse_event_term
0,fall,fall
1,gout,Gout
2,rash,Rash
3,mass,MASS
4,cold,Cold


In [None]:
ae_lookup_df = unique_ae.merge(term_hits, how='inner', left_on='adverse_event_term', right_on='LLT/PT Term')

In [None]:
ae_lookup_df.head()

Unnamed: 0,lower,adverse_event_term,LLT/PT Term,LLT/PT Code,PT
0,pain,pain,pain,10033371.0,Pain
1,coma,Coma,Coma,10010071.0,Coma
2,wound,Wound,Wound,10052428.0,Wound
3,rigor,rigor,rigor,10039175.0,Chills
4,goitre,goitre,goitre,10018498.0,Goitre


In [None]:
ae_lookup_df.shape

(711, 5)

#### Create Final Table

In [None]:
final_ae_2 = pd.DataFrame(columns=ae_lookup_df['PT'].unique(), index=trial_by_ae_2['id'])

In [None]:
final_ae_2.shape

(30281, 593)

In [None]:
final_ae_2.head()

Unnamed: 0_level_0,Pain,Coma,Wound,Chills,Goitre,Abulia,Tremor,Nausea,Anaemia,Amyloid related imaging abnormality-oedema/effusion,...,Lip neoplasm malignant stage unspecified,Gastrointestinal arteriovenous malformation,Anticoagulation drug level above therapeutic,Hyperglycaemic hyperosmolar nonketotic syndrome,Superficial siderosis of central nervous system,"Dementia of the Alzheimer's type, with delusions",Brief psychotic disorder without marked stressors,Magnetic resonance imaging head abnormal,Mucoid degeneration of the anterior cruciate ligament,Amyloid related imaging abnormality-microhaemorrhages and haemosiderin deposits
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
118572540.0,,,,,,,,,,,...,,,,,,,,,,
118572541.0,,,,,,,,,,,...,,,,,,,,,,
118572542.0,,,,,,,,,,,...,,,,,,,,,,
118572543.0,,,,,,,,,,,...,,,,,,,,,,
118572544.0,,,,,,,,,,,...,,,,,,,,,,


In [None]:
for i in range(trial_by_ae_2.shape[0]):
  check = 0
  ind = trial_by_ae_2['id'][i]
  term = trial_by_ae_2['adverse_events'][i][1:-1]
  if (ae_lookup_df['lower'] == term).any():
    adverse_event = ae_lookup_df.loc[ae_lookup_df['lower'] == term]
    adverse_event = adverse_event['PT'].values[0]
    final_ae_2.loc[ind, adverse_event] = 1
    check = 1
  if check == 0:
    final_ae_2.drop(ind)

In [None]:
final_ae_2.shape

(30281, 593)

In [None]:
final_ae_2.dropna(axis=0, how='all', inplace=True)
final_ae_2.fillna(0, inplace=True)
final_ae_2.shape

(8724, 593)

In [None]:
alzheimers_ae_table_2 = final_ae_2

In [None]:
frames = [alzheimers_ae_table_1, alzheimers_ae_table_2]
  
alzheimers_ae_table = pd.concat(frames)

In [None]:
alzheimers_ae_table.to_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/Final AE Tables/alzheimers_ae_table.csv')

# Parkinson's Adverse Event Table Notebook 

## Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import os
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Read in Tables

In [None]:
unique_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/distinct_reported_events.csv')

In [None]:
smq_mapped = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/smq_edited.csv')

In [None]:
smq_mapped.head()

In [None]:
trial_by_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/subset_adverse_events files/parkinsons_subset_adverse_events.csv', index_col=0)

In [None]:
trial_by_ae.head()

In [None]:
unique_id = pd.DataFrame(trial_by_ae['id'].unique(), columns = ['id'])

In [None]:
unique_id.head()

In [None]:
unique_ae_case = pd.DataFrame(trial_by_ae['adverse_events'].unique(), columns = ['lower'])
unique_ae_case['lower'] = unique_ae_case['lower'].str.slice(1,-1,1)

In [None]:
unique_ae_case.head()

In [None]:
unique_ae = unique_ae_case.merge(unique_ae, how='inner', on='lower')

In [None]:
unique_ae.shape, unique_id.shape, smq_mapped.shape, trial_by_ae.shape

## Data Cleaning

## Sort for analysis

In [None]:
unique_ae.sort_values(by='adverse_event_term', key=lambda x: x.str.len(), inplace=True)

## Drop duplicates and NaN values

In [None]:
unique_ae.drop_duplicates('lower', inplace=True)
unique_ae.dropna(axis=0, inplace=True)
unique_ae.reset_index(drop=True, inplace=True)
smq_mapped.drop('Row ID', axis=1, inplace=True)

In [None]:
trial_by_ae.dropna(axis=0, inplace=True)
trial_by_ae.reset_index(drop=True, inplace=True)

In [None]:
term_hits = pd.DataFrame(smq_mapped.dropna(axis=0))

In [None]:
unique_ae.shape, smq_mapped.shape, term_hits.shape, trial_by_ae.shape, unique_id.shape

In [None]:
unique_ae.head()

In [None]:
ae_lookup_df = unique_ae.merge(term_hits, how='inner', left_on='adverse_event_term', right_on='LLT/PT Term')

In [None]:
ae_lookup_df.head()

In [None]:
ae_lookup_df.shape

## Create Final Table

In [None]:
final_ae = pd.DataFrame(columns=ae_lookup_df['PT'].unique(), index=trial_by_ae['id'])

In [None]:
final_ae.shape

In [None]:
final_ae.head()

In [None]:
for i in range(trial_by_ae.shape[0]):
  check = 0
  ind = trial_by_ae['id'][i]
  term = trial_by_ae['adverse_events'][i][1:-1]
  if (ae_lookup_df['lower'] == term).any():
    adverse_event = ae_lookup_df.loc[ae_lookup_df['lower'] == term]
    adverse_event = adverse_event['PT'].values[0]
    final_ae.loc[ind, adverse_event] = 1
    check = 1
  if check == 0:
    final_ae.drop(ind)

In [None]:
final_ae.shape

(26591, 616)

In [None]:
final_ae.dropna(axis=0, how='all', inplace=True)
final_ae.fillna(0, inplace=True)
final_ae.shape

(6625, 616)

In [None]:
parkinsons_ae_table = final_ae

In [None]:
parkinsons_ae_table.to_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/Final AE Tables/parkinsons_ae_table.csv')

# ALS Adverse Event Table Notebook 

## Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import os
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Read in Tables

In [None]:
unique_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/distinct_reported_events.csv')

In [None]:
smq_mapped = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/smq_edited.csv')

In [None]:
smq_mapped.head()

In [None]:
trial_by_ae = pd.read_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/subset_adverse_events files/als_subset_adverse_events.csv', index_col=0)

In [None]:
trial_by_ae.head()

In [None]:
unique_id = pd.DataFrame(trial_by_ae['id'].unique(), columns = ['id'])

In [None]:
unique_id.head()

In [None]:
unique_ae_case = pd.DataFrame(trial_by_ae['adverse_events'].unique(), columns = ['lower'])
unique_ae_case['lower'] = unique_ae_case['lower'].str.slice(1,-1,1)

In [None]:
unique_ae_case.head()

In [None]:
unique_ae = unique_ae_case.merge(unique_ae, how='inner', on='lower')

In [None]:
unique_ae.shape, unique_id.shape, smq_mapped.shape, trial_by_ae.shape

## Data Cleaning

## Sort for analysis

In [None]:
unique_ae.sort_values(by='adverse_event_term', key=lambda x: x.str.len(), inplace=True)

## Drop duplicates and NaN values

In [None]:
unique_ae.drop_duplicates('lower', inplace=True)
unique_ae.dropna(axis=0, inplace=True)
unique_ae.reset_index(drop=True, inplace=True)
smq_mapped.drop('Row ID', axis=1, inplace=True)

In [None]:
trial_by_ae.dropna(axis=0, inplace=True)
trial_by_ae.reset_index(drop=True, inplace=True)

In [None]:
term_hits = pd.DataFrame(smq_mapped.dropna(axis=0))

In [None]:
unique_ae.shape, smq_mapped.shape, term_hits.shape, trial_by_ae.shape, unique_id.shape

In [None]:
unique_ae.head()

In [None]:
ae_lookup_df = unique_ae.merge(term_hits, how='inner', left_on='adverse_event_term', right_on='LLT/PT Term')

In [None]:
ae_lookup_df.head()

In [None]:
ae_lookup_df.shape

## Create Final Table

In [None]:
final_ae = pd.DataFrame(columns=ae_lookup_df['PT'].unique(), index=trial_by_ae['id'])

In [None]:
final_ae.shape

In [None]:
final_ae.head()

In [None]:
for i in range(trial_by_ae.shape[0]):
  check = 0
  ind = trial_by_ae['id'][i]
  term = trial_by_ae['adverse_events'][i][1:-1]
  if (ae_lookup_df['lower'] == term).any():
    adverse_event = ae_lookup_df.loc[ae_lookup_df['lower'] == term]
    adverse_event = adverse_event['PT'].values[0]
    final_ae.loc[ind, adverse_event] = 1
    check = 1
  if check == 0:
    final_ae.drop(ind)

In [None]:
final_ae.shape

In [None]:
final_ae.dropna(axis=0, how='all', inplace=True)
final_ae.fillna(0, inplace=True)
final_ae.shape

In [None]:
als_ae_table = final_ae

In [None]:
als_ae_table.to_csv('/content/drive/MyDrive/HIDS510/Collab Notebooks/Adverse Events Table/Final AE Tables/als_ae_table.csv')