In [1]:
import pandas as pd
from pharmatools.clinical_trials import get_trial_data

In [7]:
def trial_data(df):
    # convert to datetime
    df['earliest date'] = pd.to_datetime(df['earliest date'])

    # prepare dataframe
    df['n_trials'] = 0
    df['status_not_yet_recruiting'] = 0
    df['status_recruiting'] = 0
    df['status_enrolling_by_invitation'] = 0
    df['status_active_not_recruiting'] = 0
    df['status_suspended'] = 0
    df['status_terminated'] = 0
    df['status_completed'] = 0
    df['status_withdrawn'] = 0
    df['status_unknown'] = 0

    df['org_fed'] = 0
    df['org_indiv'] = 0
    df['org_industry'] = 0
    df['org_network'] = 0
    df['org_nih'] = 0
    df['org_other'] = 0
    df['org_other_gov'] = 0

    df['phase_early_1'] = 0
    df['phase_not_applicable'] = 0
    df['phase_1'] = 0
    df['phase_2'] = 0
    df['phase_3'] = 0
    df['phase_4'] = 0

    # pull data from API into dataframe
    for index, row in df.iterrows():

        print(f'fetching trial data for {index}, {row["Medicine name"]}')

        # call ClinicalTrials API
        try:
            data = get_trial_data(row['INN or active substance'], row['Therapeutic area'], row['earliest date'])

            # update dataframe
            df['n_trials'][index] = data['n_trials']

            df['status_not_yet_recruiting'][index] = data['status']['Not yet recruiting']
            df['status_recruiting'][index] = data['status']['Recruiting']
            df['status_enrolling_by_invitation'][index] = data['status']['Enrolling by invitation']
            df['status_active_not_recruiting'][index] = data['status']['Active, not recruiting']
            df['status_suspended'][index] = data['status']['Suspended']
            df['status_terminated'][index] = data['status']['Terminated']
            df['status_completed'][index] = data['status']['Completed']
            df['status_withdrawn'][index] = data['status']['Withdrawn']
            df['status_unknown'][index] = data['status']['Unknown status']

            df['org_fed'][index] = data['organizers']['FED']
            df['org_indiv'][index] = data['organizers']['INDIV']
            df['org_industry'][index] = data['organizers']['INDUSTRY']
            df['org_network'][index] = data['organizers']['NETWORK']
            df['org_nih'][index] = data['organizers']['NIH']
            df['org_other'][index] = data['organizers']['OTHER']
            df['org_other_gov'][index] = data['organizers']['OTHER_GOV']

            df['phase_early_1'][index] = data['phases']['Early Phase 1']
            df['phase_not_applicable'][index] = data['phases']['Not Applicable']
            df['phase_1'][index] = data['phases']['Phase 1']
            df['phase_2'][index] = data['phases']['Phase 2']
            df['phase_3'][index] = data['phases']['Phase 3']
            df['phase_4'][index] = data['phases']['Phase 4']
        except:
            print(f"{index} not successful")

    return df

In [3]:
df = pd.read_csv('../raw_data/arw.csv', index_col=0)

In [4]:
df.isna().sum()

Category                                       0
Medicine name                                  0
Therapeutic area                               0
INN or active substance                        1
Authorisation status                           0
Orphan medicine                                0
Marketing authorisation holder/company name    8
earliest date                                  0
dtype: int64

In [5]:
df.dropna(axis='index', subset=['INN or active substance'], inplace=True)

In [21]:
df['INN or active substance'] = df['INN or active substance'].str.replace("[", "").str.replace("]", "")
df['Therapeutic area'] = df['Therapeutic area'].str.replace("[", "").str.replace("]", "")

In [23]:
df = trial_data(df)

fetching trial data for 0, Ranexa (previously Latixa)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a

fetching trial data for 1, Signifor
fetching trial data for 2, Clopidogrel / Acetylsalicylic acid Mylan
fetching trial data for 3, Arikayce liposomal
fetching trial data for 4, DuoPlavin
fetching trial data for 5, Irbesartan Zentiva (previously Irbesartan Winthrop)
fetching trial data for 6, Viramune
fetching trial data for 7, Bortezomib Sun
fetching trial data for 8, Ofev
fetching trial data for 9, Clopidogrel/Acetylsalicylic acid Zentiva (previously DuoCover)
fetching trial data for 10, Clopidogrel Teva (hydrogen sulphate)
fetching trial data for 11, Efient
fetching trial data for 12, Kengrexal
fetching trial data for 13, Rydapt
fetching trial data for 14, Procysbi
fetching trial data for 15, Colobreathe
fetching trial data for 16, Lyxumia 
fetching trial data for 17, Glyxambi
fetching trial data for 18, Rivastigmine 1 A Pharma
fetching trial data for 19, Prometax
fetching trial data for 20, Besponsa
fetching trial data for 21, Ivabradine Anpharm
fetching trial data for 22, Isentress

fetching trial data for 196, Abilify Maintena
fetching trial data for 197, Kovaltry
fetching trial data for 198, Aybintio
fetching trial data for 199, Cystadrops
fetching trial data for 200, Praxbind
fetching trial data for 201, Equidacent
fetching trial data for 202, Grastofil
fetching trial data for 203, Tegsedi
fetching trial data for 204, Genvoya
fetching trial data for 205, Fulvestrant Mylan
fetching trial data for 206, Bavencio
fetching trial data for 207, Ondexxya
fetching trial data for 208, Methylthioninium chloride Cosmo
fetching trial data for 209, Actraphane
fetching trial data for 210, Odomzo
fetching trial data for 211, Zinforo
fetching trial data for 212, Onivyde pegylated liposomal (previously known as Onivyde)
fetching trial data for 213, Invanz
fetching trial data for 214, Rezolsta
fetching trial data for 215, Aldara
fetching trial data for 216, Orkambi
fetching trial data for 217, Mepsevii
fetching trial data for 218, Prezista
fetching trial data for 219, Exviera
fet

fetching trial data for 397, Zeffix
fetching trial data for 398, Hepsera
fetching trial data for 399, Entecavir Accord
fetching trial data for 400, Sebivo
fetching trial data for 401, Hyrimoz
fetching trial data for 402, Synagis
fetching trial data for 403, Imraldi
fetching trial data for 404, Kaletra
fetching trial data for 405, Efavirenz/Emtricitabine/Tenofovir disoproxil Mylan
fetching trial data for 406, Hulio
fetching trial data for 407, Aptivus
fetching trial data for 408, HBVaxPro
fetching trial data for 409, Fendrix
fetching trial data for 410, Mekinist
fetching trial data for 411, Vargatef
fetching trial data for 412, Rxulti
fetching trial data for 413, Fasenra
fetching trial data for 414, Eucreas
fetching trial data for 415, Halimatoz
fetching trial data for 416, CellCept
fetching trial data for 417, Carmustine Obvius
fetching trial data for 418, Pramipexole Teva
fetching trial data for 419, Vokanamet
fetching trial data for 420, Zepatier
fetching trial data for 421, Ucedane


fetching trial data for 596, Icandra (previously Vildagliptin / metformin hydrochloride Novartis)
fetching trial data for 597, Xelevia
fetching trial data for 598, Miglustat Gen.Orph
fetching trial data for 599, Mulpleo (previously Lusutrombopag Shionogi)
fetching trial data for 600, Karvea
fetching trial data for 601, Descovy
fetching trial data for 602, Farydak
fetching trial data for 603, Vaxelis
fetching trial data for 604, Isturisa
fetching trial data for 605, Alimta
fetching trial data for 606, Zevalin
fetching trial data for 607, Ikervis
fetching trial data for 608, Vimpat
fetching trial data for 609, Janumet
fetching trial data for 610, Flebogamma DIF (previously Flebogammadif)
fetching trial data for 611, Mozobil
fetching trial data for 612, Oncaspar
fetching trial data for 613, Folotyn
fetching trial data for 614, Efavirenz/Emtricitabine/Tenofovir disoproxil Zentiva
fetching trial data for 615, Targretin
fetching trial data for 616, Mysildecard
fetching trial data for 617, Re

fetching trial data for 790, Hycamtin
fetching trial data for 791, Topotecan Teva
fetching trial data for 792, Efavirenz/Emtricitabine/Tenofovir disoproxil Krka
fetching trial data for 793, Cerezyme
fetching trial data for 794, Rixubis
fetching trial data for 795, Luveris
fetching trial data for 796, Levetiracetam Hospira
fetching trial data for 797, Effentora
fetching trial data for 798, Instanyl
fetching trial data for 799, Clopidogrel Krka d.d. (previously Zopya)
fetching trial data for 800, PecFent
fetching trial data for 801, Quadramet
fetching trial data for 802, Zalviso
fetching trial data for 803, Dificlir
fetching trial data for 804, Zynrelef
fetching trial data for 805, Xtandi
fetching trial data for 806, Dynastat
fetching trial data for 807, Cholestagel
fetching trial data for 808, Nevanac
fetching trial data for 809, Yargesa
fetching trial data for 810, Envarsus
fetching trial data for 811, Methylthioninium chloride Proveblue
fetching trial data for 812, Yellox
fetching tri

fetching trial data for 982, Blincyto
fetching trial data for 983, Azomyr
fetching trial data for 984, Modigraf
fetching trial data for 985, Xadago
fetching trial data for 986, Controloc Control
fetching trial data for 987, Aerius
fetching trial data for 988, Neoclarityn
fetching trial data for 989, Incruse Ellipta (previously Incruse)
fetching trial data for 990, Lonquex
fetching trial data for 991, Edurant
fetching trial data for 992, Efavirenz Teva
fetching trial data for 993, Desloratadine Actavis
fetching trial data for 994, Uptravi
fetching trial data for 995, Stivarga
fetching trial data for 996, Tyverb
fetching trial data for 997, M-M-RVaxPro
fetching trial data for 998, Aerinaze
fetching trial data for 999, Somac Control
fetching trial data for 1000, Levetiracetam Sun
fetching trial data for 1001, Panretin
fetching trial data for 1002, Caelyx pegylated liposomal
fetching trial data for 1003, Aripiprazole Accord
fetching trial data for 1004, Ventavis
fetching trial data for 100

fetching trial data for 1175, Apixaban Accord
fetching trial data for 1176, Ivemend
fetching trial data for 1177, Sancuso
fetching trial data for 1178, Human IGG1 monoclonal antibody specific for human interleukin-1 alpha XBiotech
fetching trial data for 1179, NovoNorm
fetching trial data for 1180, Busilvex
fetching trial data for 1181, Aloxi
fetching trial data for 1182, Oxervate
fetching trial data for 1183, Rekovelle
fetching trial data for 1184, Portrazza
fetching trial data for 1185, Ribavirin Teva
fetching trial data for 1186, Arixtra
fetching trial data for 1187, Repaglinide Teva
fetching trial data for 1188, Enyglid
fetching trial data for 1189, Akynzeo
fetching trial data for 1190, Actos
fetching trial data for 1191, Palonosetron Accord
fetching trial data for 1192, Emend
fetching trial data for 1193, Solumarv
fetching trial data for 1194, Lympreva
fetching trial data for 1195, Ytracis
fetching trial data for 1196, Eylea
fetching trial data for 1197, Masiviera
fetching trial d

fetching trial data for 1369, Vitragan 
fetching trial data for 1370, Arxxant 
fetching trial data for 1371, Synordia 
fetching trial data for 1372, Riquent 
fetching trial data for 1373, Multaq 
fetching trial data for 1374, Surfaxin 
fetching trial data for 1375, Scintimun 
fetching trial data for 1376, Ximelagatran 36 mg film coated tablets 
fetching trial data for 1377, Orathecin 
fetching trial data for 1378, Veraseal 


In [29]:
df.to_csv('arw_CT.csv')

In [25]:
df

Unnamed: 0,Category,Medicine name,Therapeutic area,INN or active substance,Authorisation status,Orphan medicine,Marketing authorisation holder/company name,earliest date,n_trials,status_not_yet_recruiting,...,org_network,org_nih,org_other,org_other_gov,phase_early_1,phase_not_applicable,phase_1,phase_2,phase_3,phase_4
0,Human,Ranexa (previously Latixa),Angina Pectoris,ranolazine,0,0,Menarini International Operations Luxembourg S...,2008-07-08 00:00:00,43,1,...,0,0,32,1,0,7,1,11,7,18
1,Human,Signifor,"Acromegaly, Pituitary ACTH Hypersecretion",pasireotide,0,1,Recordati Rare Diseases,2012-04-24 00:00:00,24,0,...,0,0,7,0,0,0,1,11,6,4
2,Human,Clopidogrel / Acetylsalicylic acid Mylan,"Acute Coronary Syndrome, Myocardial Infarction","clopidogrel, acetylsalicylic acid",0,0,Mylan S.A.S.,2019-11-14 01:00:00,343,14,...,4,0,263,10,1,44,10,43,63,145
3,Human,Arikayce liposomal,Respiratory Tract Infections,amikacin,0,1,Insmed Netherlands B.V.,2020-07-23 00:00:00,34,0,...,0,1,15,1,0,8,3,14,8,2
4,Human,DuoPlavin,"Acute Coronary Syndrome, Myocardial Infarction","clopidogrel, acetylsalicylic acid",0,0,sanofi-aventis groupe,2009-12-17 01:00:00,343,14,...,4,0,263,10,1,44,10,43,63,145
5,Human,Irbesartan Zentiva (previously Irbesartan Wint...,Hypertension,irbesartan,0,0,Zentiva k.s.,2007-01-19 01:00:00,72,0,...,0,0,26,1,0,9,1,6,17,35
6,Human,Viramune,HIV Infections,nevirapine,0,0,Boehringer Ingelheim International GmbH,1997-10-22 00:00:00,273,0,...,10,61,104,20,1,34,42,52,68,57
7,Human,Bortezomib Sun,Multiple Myeloma,bortezomib,0,0,SUN Pharmaceutical Industries (Europe) B.V.,2016-03-26 01:00:00,562,6,...,21,28,326,7,5,11,156,313,94,14
8,Human,Ofev,Idiopathic Pulmonary Fibrosis,nintedanib,0,0,Boehringer Ingelheim International GmbH,2014-11-20 01:00:00,44,2,...,0,0,7,0,0,2,2,11,7,6
9,Human,Clopidogrel/Acetylsalicylic acid Zentiva (prev...,"Acute Coronary Syndrome, Myocardial Infarction","clopidogrel, acetylsalicylic acid",0,0,Sanofi-Aventis Groupe,2010-03-14 01:00:00,343,14,...,4,0,263,10,1,44,10,43,63,145


In [28]:
df.isna().sum()

Category                                       0
Medicine name                                  0
Therapeutic area                               0
INN or active substance                        0
Authorisation status                           0
Orphan medicine                                0
Marketing authorisation holder/company name    7
earliest date                                  0
n_trials                                       0
status_not_yet_recruiting                      0
status_recruiting                              0
status_enrolling_by_invitation                 0
status_active_not_recruiting                   0
status_suspended                               0
status_terminated                              0
status_completed                               0
status_withdrawn                               0
status_unknown                                 0
org_fed                                        0
org_indiv                                      0
org_industry        