# Python test

## 1. Import and clean the data

In [1]:
import pandas as pd
import numpy as np
import re

### 1.1. Drugs file

In [2]:
drugs_df = pd.read_csv("drugs.csv")

In [3]:
drugs_df

Unnamed: 0,atccode,drug
0,A04AD,DIPHENHYDRAMINE
1,S03AA,TETRACYCLINE
2,V03AB,ETHANOL
3,A03BA,ATROPINE
4,A01AD,EPINEPHRINE
5,6302001,ISOPRENALINE
6,R01AD,BETAMETHASONE


### 1.2. Pubmed files

In [4]:
pubmed_df = pd.read_csv("pubmed.csv")

In [5]:
pubmed_df

Unnamed: 0,id,title,date,journal
0,1,A 44-year-old man with erythema of the face di...,01/01/2019,Journal of emergency nursing
1,2,"An evaluation of benadryl, pyribenzamine, and ...",01/01/2019,Journal of emergency nursing
2,3,Diphenhydramine hydrochloride helps symptoms o...,02/01/2019,The Journal of pediatrics
3,4,Tetracycline Resistance Patterns of Lactobacil...,01/01/2020,Journal of food protection
4,5,Appositional Tetracycline bone formation rates...,02/01/2020,American journal of veterinary research
5,6,Rapid reacquisition of contextual fear followi...,2020-01-01,Psychopharmacology
6,7,The High Cost of Epinephrine Autoinjectors and...,01/02/2020,The journal of allergy and clinical immunology...
7,8,Time to epinephrine treatment is associated wi...,01/03/2020,The journal of allergy and clinical immunology...


In [6]:
pubmed_df['date'] = pd.to_datetime(pubmed_df['date'])

In [7]:
pubmed_df

Unnamed: 0,id,title,date,journal
0,1,A 44-year-old man with erythema of the face di...,2019-01-01,Journal of emergency nursing
1,2,"An evaluation of benadryl, pyribenzamine, and ...",2019-01-01,Journal of emergency nursing
2,3,Diphenhydramine hydrochloride helps symptoms o...,2019-02-01,The Journal of pediatrics
3,4,Tetracycline Resistance Patterns of Lactobacil...,2020-01-01,Journal of food protection
4,5,Appositional Tetracycline bone formation rates...,2020-02-01,American journal of veterinary research
5,6,Rapid reacquisition of contextual fear followi...,2020-01-01,Psychopharmacology
6,7,The High Cost of Epinephrine Autoinjectors and...,2020-01-02,The journal of allergy and clinical immunology...
7,8,Time to epinephrine treatment is associated wi...,2020-01-03,The journal of allergy and clinical immunology...


In [8]:
pubmed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   id       8 non-null      int64         
 1   title    8 non-null      object        
 2   date     8 non-null      datetime64[ns]
 3   journal  8 non-null      object        
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 384.0+ bytes


---

In [9]:
pubmed_json = pd.read_json("pubmed.json", orient='records')

In [10]:
pubmed_json

Unnamed: 0,id,title,date,journal
0,9,Gold nanoparticles synthesized from Euphorbia ...,2020-01-01,"Journal of photochemistry and photobiology. B,..."
1,10,Clinical implications of umbilical artery Dopp...,2020-01-01,The journal of maternal-fetal & neonatal medicine
2,11,Effects of Topical Application of Betamethason...,2020-01-01,Journal of back and musculoskeletal rehabilita...
3,12,"Comparison of pressure release, phonophoresis ...",2020-01-03,Journal of back and musculoskeletal rehabilita...
4,13,"Comparison of pressure BETAMETHASONE release, ...",2020-01-03,The journal of maternal-fetal & neonatal medicine


In [11]:
pubmed_json['date'] = pd.to_datetime(pubmed_json['date'])

In [12]:
pubmed_json.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   id       5 non-null      int64         
 1   title    5 non-null      object        
 2   date     5 non-null      datetime64[ns]
 3   journal  5 non-null      object        
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 288.0+ bytes


---

In [13]:
pubmed_final_df = pubmed_df.append(pubmed_json).reset_index(drop=True)

In [14]:
pubmed_final_df

Unnamed: 0,id,title,date,journal
0,1,A 44-year-old man with erythema of the face di...,2019-01-01,Journal of emergency nursing
1,2,"An evaluation of benadryl, pyribenzamine, and ...",2019-01-01,Journal of emergency nursing
2,3,Diphenhydramine hydrochloride helps symptoms o...,2019-02-01,The Journal of pediatrics
3,4,Tetracycline Resistance Patterns of Lactobacil...,2020-01-01,Journal of food protection
4,5,Appositional Tetracycline bone formation rates...,2020-02-01,American journal of veterinary research
5,6,Rapid reacquisition of contextual fear followi...,2020-01-01,Psychopharmacology
6,7,The High Cost of Epinephrine Autoinjectors and...,2020-01-02,The journal of allergy and clinical immunology...
7,8,Time to epinephrine treatment is associated wi...,2020-01-03,The journal of allergy and clinical immunology...
8,9,Gold nanoparticles synthesized from Euphorbia ...,2020-01-01,"Journal of photochemistry and photobiology. B,..."
9,10,Clinical implications of umbilical artery Dopp...,2020-01-01,The journal of maternal-fetal & neonatal medicine


In [15]:
# retour vers des dates en string pour une meilleure lisibilité
pubmed_final_df['date'] = pubmed_final_df['date'].dt.strftime('%Y-%m-%d')

### 1.3. Clinical trials file

In [16]:
clinical_trials_df = pd.read_csv("clinical_trials.csv")

In [17]:
clinical_trials_df['date'] = pd.to_datetime(clinical_trials_df['date'])

In [18]:
clinical_trials_df

Unnamed: 0,id,scientific_title,date,journal
0,NCT01967433,Use of Diphenhydramine as an Adjunctive Sedati...,2020-01-01,Journal of emergency nursing
1,NCT04189588,Phase 2 Study IV QUZYTTIR™ (Cetirizine Hydroch...,2020-01-01,Journal of emergency nursing
2,NCT04237090,,2020-01-01,Journal of emergency nursing
3,NCT04237091,Feasibility of a Randomized Controlled Clinica...,2020-01-01,Journal of emergency nursing
4,NCT04153396,Preemptive Infiltration With Betamethasone and...,2020-01-01,Hôpitaux Universitaires de Genève
5,NCT03490942,Glucagon Infusion in T1D Patients With Recurre...,2020-05-25,
6,,Glucagon Infusion in T1D Patients With Recurre...,2020-05-25,Journal of emergency nursing
7,NCT04188184,Tranexamic Acid Versus Epinephrine During Expl...,2020-04-27,Journal of emergency nursing\xc3\x28


In [19]:
clinical_trials_final_df = clinical_trials_df.fillna('')\
    .groupby('scientific_title', as_index=False)\
    .max().replace(r'^\s*$', np.nan, regex=True)\
    .dropna()

# on replace les NaN par un str vide pour regrouper les titres en double
# on replace les string contenant des espaces pour enlever les lignes dont le titre est vide

In [20]:
clinical_trials_final_df

Unnamed: 0,scientific_title,id,date,journal
1,Feasibility of a Randomized Controlled Clinica...,NCT04237091,2020-01-01,Journal of emergency nursing
2,Glucagon Infusion in T1D Patients With Recurre...,NCT03490942,2020-05-25,Journal of emergency nursing
3,Phase 2 Study IV QUZYTTIR™ (Cetirizine Hydroch...,NCT04189588,2020-01-01,Journal of emergency nursing
4,Preemptive Infiltration With Betamethasone and...,NCT04153396,2020-01-01,Hôpitaux Universitaires de Genève
5,Tranexamic Acid Versus Epinephrine During Expl...,NCT04188184,2020-04-27,Journal of emergency nursing\xc3\x28
6,Use of Diphenhydramine as an Adjunctive Sedati...,NCT01967433,2020-01-01,Journal of emergency nursing


In [21]:
clinical_trials_final_df['date'] = clinical_trials_final_df['date'].dt.strftime('%Y-%m-%d')

---

### 2. Construction du graphe de liaison

In [22]:
drugs_df

Unnamed: 0,atccode,drug
0,A04AD,DIPHENHYDRAMINE
1,S03AA,TETRACYCLINE
2,V03AB,ETHANOL
3,A03BA,ATROPINE
4,A01AD,EPINEPHRINE
5,6302001,ISOPRENALINE
6,R01AD,BETAMETHASONE


In [23]:
pubmed_final_df

Unnamed: 0,id,title,date,journal
0,1,A 44-year-old man with erythema of the face di...,2019-01-01,Journal of emergency nursing
1,2,"An evaluation of benadryl, pyribenzamine, and ...",2019-01-01,Journal of emergency nursing
2,3,Diphenhydramine hydrochloride helps symptoms o...,2019-02-01,The Journal of pediatrics
3,4,Tetracycline Resistance Patterns of Lactobacil...,2020-01-01,Journal of food protection
4,5,Appositional Tetracycline bone formation rates...,2020-02-01,American journal of veterinary research
5,6,Rapid reacquisition of contextual fear followi...,2020-01-01,Psychopharmacology
6,7,The High Cost of Epinephrine Autoinjectors and...,2020-01-02,The journal of allergy and clinical immunology...
7,8,Time to epinephrine treatment is associated wi...,2020-01-03,The journal of allergy and clinical immunology...
8,9,Gold nanoparticles synthesized from Euphorbia ...,2020-01-01,"Journal of photochemistry and photobiology. B,..."
9,10,Clinical implications of umbilical artery Dopp...,2020-01-01,The journal of maternal-fetal & neonatal medicine


In [24]:
list_pubmed = []

for index, value in drugs_df['drug'].items():
    temp_list = []
    for index2, value2 in pubmed_final_df['title'].items():
        if drugs_df['drug'][index].lower() in value2.lower():
            temp_list.append(pubmed_final_df.iloc[index2].to_dict())
    list_pubmed.append(temp_list)

list_pubmed

[[{'id': 1,
   'title': 'A 44-year-old man with erythema of the face diphenhydramine, neck, and chest, weakness, and palpitations',
   'date': '2019-01-01',
   'journal': 'Journal of emergency nursing'},
  {'id': 2,
   'title': 'An evaluation of benadryl, pyribenzamine, and other so-called diphenhydramine antihistaminic drugs in the treatment of allergy.',
   'date': '2019-01-01',
   'journal': 'Journal of emergency nursing'},
  {'id': 3,
   'title': 'Diphenhydramine hydrochloride helps symptoms of ciguatera fish poisoning.',
   'date': '2019-02-01',
   'journal': 'The Journal of pediatrics'}],
 [{'id': 4,
   'title': 'Tetracycline Resistance Patterns of Lactobacillus buchneri Group Strains.',
   'date': '2020-01-01',
   'journal': 'Journal of food protection'},
  {'id': 5,
   'title': 'Appositional Tetracycline bone formation rates in the Beagle.',
   'date': '2020-02-01',
   'journal': 'American journal of veterinary research'},
  {'id': 6,
   'title': 'Rapid reacquisition of context

In [25]:
drugs_df['pubmeds'] = list_pubmed

In [26]:
drugs_df

Unnamed: 0,atccode,drug,pubmeds
0,A04AD,DIPHENHYDRAMINE,"[{'id': 1, 'title': 'A 44-year-old man with er..."
1,S03AA,TETRACYCLINE,"[{'id': 4, 'title': 'Tetracycline Resistance P..."
2,V03AB,ETHANOL,"[{'id': 6, 'title': 'Rapid reacquisition of co..."
3,A03BA,ATROPINE,"[{'id': 13, 'title': 'Comparison of pressure B..."
4,A01AD,EPINEPHRINE,"[{'id': 7, 'title': 'The High Cost of Epinephr..."
5,6302001,ISOPRENALINE,"[{'id': 9, 'title': 'Gold nanoparticles synthe..."
6,R01AD,BETAMETHASONE,"[{'id': 10, 'title': 'Clinical implications of..."


In [27]:
clinical_trials_final_df

Unnamed: 0,scientific_title,id,date,journal
1,Feasibility of a Randomized Controlled Clinica...,NCT04237091,2020-01-01,Journal of emergency nursing
2,Glucagon Infusion in T1D Patients With Recurre...,NCT03490942,2020-05-25,Journal of emergency nursing
3,Phase 2 Study IV QUZYTTIR™ (Cetirizine Hydroch...,NCT04189588,2020-01-01,Journal of emergency nursing
4,Preemptive Infiltration With Betamethasone and...,NCT04153396,2020-01-01,Hôpitaux Universitaires de Genève
5,Tranexamic Acid Versus Epinephrine During Expl...,NCT04188184,2020-04-27,Journal of emergency nursing\xc3\x28
6,Use of Diphenhydramine as an Adjunctive Sedati...,NCT01967433,2020-01-01,Journal of emergency nursing


---

In [28]:
list_ctrials = []

for index, value in drugs_df['drug'].items():
    temp_list = []
    for index2, value2 in clinical_trials_final_df['scientific_title'].items():
        if drugs_df['drug'][index].lower() in value2.lower():
            temp_list.append(pubmed_final_df.iloc[index2].to_dict())
    list_ctrials.append(temp_list)

list_ctrials

[[{'id': 2,
   'title': 'An evaluation of benadryl, pyribenzamine, and other so-called diphenhydramine antihistaminic drugs in the treatment of allergy.',
   'date': '2019-01-01',
   'journal': 'Journal of emergency nursing'},
  {'id': 4,
   'title': 'Tetracycline Resistance Patterns of Lactobacillus buchneri Group Strains.',
   'date': '2020-01-01',
   'journal': 'Journal of food protection'},
  {'id': 7,
   'title': 'The High Cost of Epinephrine Autoinjectors and Possible Alternatives.',
   'date': '2020-01-02',
   'journal': 'The journal of allergy and clinical immunology. In practice'}],
 [],
 [],
 [],
 [{'id': 6,
   'title': 'Rapid reacquisition of contextual fear following extinction in mice: effects of amount of extinction, tetracycline acute ethanol withdrawal, and ethanol intoxication.',
   'date': '2020-01-01',
   'journal': 'Psychopharmacology'}],
 [],
 [{'id': 5,
   'title': 'Appositional Tetracycline bone formation rates in the Beagle.',
   'date': '2020-02-01',
   'journa

In [29]:
drugs_df['clinical_trials'] = list_ctrials

In [30]:
drugs_df

Unnamed: 0,atccode,drug,pubmeds,clinical_trials
0,A04AD,DIPHENHYDRAMINE,"[{'id': 1, 'title': 'A 44-year-old man with er...","[{'id': 2, 'title': 'An evaluation of benadryl..."
1,S03AA,TETRACYCLINE,"[{'id': 4, 'title': 'Tetracycline Resistance P...",[]
2,V03AB,ETHANOL,"[{'id': 6, 'title': 'Rapid reacquisition of co...",[]
3,A03BA,ATROPINE,"[{'id': 13, 'title': 'Comparison of pressure B...",[]
4,A01AD,EPINEPHRINE,"[{'id': 7, 'title': 'The High Cost of Epinephr...","[{'id': 6, 'title': 'Rapid reacquisition of co..."
5,6302001,ISOPRENALINE,"[{'id': 9, 'title': 'Gold nanoparticles synthe...",[]
6,R01AD,BETAMETHASONE,"[{'id': 10, 'title': 'Clinical implications of...","[{'id': 5, 'title': 'Appositional Tetracycline..."


In [31]:
drugs_df.to_json('final.json', orient='records')