In [1]:
import xml.etree.ElementTree as ET
import csv


In [8]:
class Database:
    def __init__(self, filename:str):
        self.__filename = filename
    
    def parse(self):
        tree = ET.parse(self.__filename)
        self.__xml_root = tree.getroot()
        
    def get_number_of_interactions(self,drug_name:str)->int:        
        results=self.__xml_root.findall("./{http://www.drugbank.ca}drug/[{http://www.drugbank.ca}name='%s']/{http://www.drugbank.ca}drug-interactions/{http://www.drugbank.ca}drug-interaction/{http://www.drugbank.ca}name" % drug_name)
        return len(results)
    
    def get_group(self,drug_name:str)->str:
        group = self.__xml_root.findall("./{http://www.drugbank.ca}drug/[{http://www.drugbank.ca}name='%s']/{http://www.drugbank.ca}groups/{http://www.drugbank.ca}group" % drug_name)    
        return group

In [9]:
DRUGBANK_DATABASES:list() = [
    (0,"DB 3.0","database/3.0/drugbank.xml",2013),
    (1,"DB 4.1","database/4.1/drugbank.xml",2014), #not sure about year
    (2,"DB 4.2","database/4.2/drugbank.xml",2015), #not sure about year
    (3,"DB 4.3","database/4.3/drugbank.xml",2015),
    (4,"DB 4.5","database/4.5.0/drugbank.xml",2016),
    (5," DB 5.0","database/5.0.0/drugbank.xml",2016),
    (6,"DB 5.0.1","database/5.0.1/drugbank.xml",2016),
    (7,"DB 5.0.2","database/5.0.2/drugbank.xml",2016),
    (8,"DB 5.0.3","database/5.0.3/drugbank.xml",2016),
    (9,"DB 5.0.4","database/5.0.4/full_database.xml",2017),
    (10,"DB 5.0.5","database/5.0.5/full_database.xml",2017),
    (11,"DB 5.0.6","database/5.0.6/full_database.xml",2017),
    (12,"DB 5.0.7","database/5.0.7/full_database.xml",2017),
    (13,"DB 5.0.8","database/5.0.8/full_database.xml",2017),
    (14,"DB 5.0.9","database/5.0.9/full_database.xml",2017), 
    (15,"DB 5.0.10","database/5.0.10/full_database.xml",2017),
    (16,"DB 5.0.11","database/5.0.11/full_database.xml",2017),
    (17,"DB 5.1.0","database/5.1.0/full_database.xml",2018),
    (18,"DB 5.1.1","database/5.1.1/full_database.xml",2018),
    (19,"DB 5.1.2","database/5.1.2/full_database.xml",2018),
    (20,"DB 5.1.3","database/5.1.3/full_database.xml",2019),
    (21,"DB 5.1.4","database/5.1.4/full_database.xml",2019),
    (22,"DB 5.1.5","database/5.1.5/full_database.xml",2020),
    (23,"DB 5.1.6","database/5.1.6/full_database.xml",2020),
    (24,"DB 5.1.7","database/5.1.7/full_database.xml",2020),
    (25,"DB 5.1.8","database/5.1.8/full_database.xml",2021)
]

In [10]:
CSV_DRUG_AGE_FILE_INPUT:str = "Drugs_age_bibliography.csv"
CSV_DRUG_AGE_FILE_OUTPUT:str = "drugs_age_interactions.csv"

In [12]:
def read_csv(file:str)->list():
    values = []
    labels = []
    data_list = list()
    with open(file, encoding='utf-8-sig') as csv_file:
        
        reader = csv.DictReader(csv_file, delimiter=',')
        
        for row in reader:
            #print(row['label'], row["approval date (year, first country)"], row["age"])
            data_list.append((row['id'],row['label'], row["approval date (year, first country)"], row["age"]))
    return data_list

In [13]:
def write_csv(file, data):
    with open(file,mode='w') as output_file:
        output_file_write = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        
        csv_labels = ['id','label','approval date (year, first country)','age']
        for db_name in DRUGBANK_DATABASES:
            csv_labels.append(db_name[1])
        output_file_write.writerow(csv_labels)
        for item in data:
            #(id,labe,approval date, age, [])
            data_row = [item[0],item[1],item[2],item[3]]
            for interaction in item[4]:
                data_row.append(interaction)
            output_file_write.writerow(data_row)

In [14]:
output_data = list()
drugs_list = read_csv(CSV_DRUG_AGE_FILE_INPUT)
for drug in drugs_list:
    output_data.append((drug[0],drug[1],drug[2],drug[3],list()))
    

In [15]:
output_data

[('7761', 'Levamisole', '1971', '50', []),
 ('7764', 'Nicotine', '1978', '43', []),
 ('7765', 'Cevimeline', '2000', '21', []),
 ('7767', 'Succinylcholine', '1952', '69', []),
 ('7774', 'Carbachol', '1951', '70', []),
 ('7795', 'Bethanechol', '1948', '73', []),
 ('7798', 'Pilocarpine', '1877', '144', []),
 ('7801', 'Ambenonium', '1950', '71', []),
 ('7807', 'Decamethonium', '1949', '72', []),
 ('7808', 'Varenicline', '2006', '15', []),
 ('7824', 'Methacholine', '1986', '35', []),
 ('7829', 'Cytisine', '1964', '57', []),
 ('7830', 'Tacrine', '1993', '28', []),
 ('7831', 'Pyridostigmine', '1955', '66', []),
 ('7832', 'Galantamine', '2001', '20', []),
 ('7833', 'Isoflurophate', '1949', '72', []),
 ('7834', 'Malathion', '1982', '39', []),
 ('7836', 'Donepezil', '1996', '25', []),
 ('7838', 'Hexafluronium', '1955', '66', []),
 ('7839', 'Demecarium', '1959', '62', []),
 ('7840', 'Physostigmine', '1875', '146', []),
 ('7841', 'Rivastigmine', '1997', '24', []),
 ('7842', 'Edrophonium', '1951', 

In [16]:
for db_version in DRUGBANK_DATABASES:
    print("Parsing Database {0}".format(db_version[1]))
    db = Database(db_version[2])
    db.parse()
    for item in output_data:
        drug_name = item[1]
        no_of_interactions = db.get_number_of_interactions(drug_name)
        item[4].append(no_of_interactions)
    del db


Parsing Database DB 3.0
Parsing Database DB 4.1
Parsing Database DB 4.2
Parsing Database DB 4.3
Parsing Database DB 4.5
Parsing Database  DB 5.0
Parsing Database DB 5.0.1
Parsing Database DB 5.0.2
Parsing Database DB 5.0.3
Parsing Database DB 5.0.4
Parsing Database DB 5.0.5
Parsing Database DB 5.0.6
Parsing Database DB 5.0.7
Parsing Database DB 5.0.8
Parsing Database DB 5.0.9
Parsing Database DB 5.0.10
Parsing Database DB 5.0.11
Parsing Database DB 5.1.0
Parsing Database DB 5.1.1
Parsing Database DB 5.1.2
Parsing Database DB 5.1.3
Parsing Database DB 5.1.4
Parsing Database DB 5.1.5
Parsing Database DB 5.1.6
Parsing Database DB 5.1.7
Parsing Database DB 5.1.8


In [17]:
write_csv(CSV_DRUG_AGE_FILE_OUTPUT,output_data)