In [41]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import numpy as np
from pharmatools.pubmed import get_abstracts, get_pubmed_ids, get_titles_abstracts_batch
from pharmatools.clinical_trials import get_trial_data
import matplotlib.pyplot as plt
from os.path import join
from dotenv import load_dotenv

env_path = join('..','.env')
load_dotenv(dotenv_path=env_path)

True

## Load

In [6]:
df = pd.read_excel('../raw_data/withdrawn_filtered.xlsx')

In [7]:
df.head()

Unnamed: 0,Category,Medicine name,International non-proprietary name (INN) / common name,Therapeutic area,Orphan medicine,Type of withdrawal,Date of withdrawal,commercial reason,earliest date
0,Human,Puldysa,idebenone,Duchenne Muscular Dystrophy,yes,Initial authorisation,2020-10-28 15:12:00,n,2020-10-28 15:12:00
1,Human,Roctavian,Valoctocogene roxaparvovec,Hemophilia A,yes,Initial authorisation,2020-11-04 15:46:00,n,2020-11-04 15:46:00
2,Human,Tibsovo,ivosidenib,Acute Myeloid Leukemia,yes,Initial authorisation,2020-10-13 14:37:00,n,2020-10-13 14:37:00
3,Human,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,no,Initial authorisation,2020-07-17 09:15:00,n,2020-07-17 09:15:00
4,Human,Rayoqta,abicipar pegol,Macular Degeneration,no,Initial authorisation,2020-07-17 16:16:00,n,2020-07-17 16:16:00


## Check duplicates

In [13]:
df[df.duplicated(subset='International non-proprietary name (INN) / common name', keep=False)].sort_values('International non-proprietary name (INN) / common name')

Unnamed: 0,Category,Medicine name,International non-proprietary name (INN) / common name,Therapeutic area,Orphan medicine,Type of withdrawal,Date of withdrawal,commercial reason,earliest date
3,Human,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,no,Initial authorisation,2020-07-17 09:15:00,n,2020-07-17 09:15:00
46,Human,Aripiprazole Mylan,aripiprazole,Schizophrenia Bipolar Disorder,no,Initial authorisation,2015-05-07 02:00:00,n,2015-05-07 02:00:00
22,Human,Restaysis,ciclosporin,Dry Eye Syndromes,no,Initial authorisation,2018-04-25 02:00:00,n,2018-04-25 02:00:00
115,Human,Vekacia,ciclosporin,Allergic Conjunctivitis,yes,Initial authorisation,2008-11-14 02:00:00,n,2008-11-14 02:00:00
128,Human,DuoCover,clopidogrel acetylsalicylic acid,Vascular Surgical Procedures Peripheral Vascul...,no,Initial authorisation,2008-05-23 02:00:00,n,2008-05-23 02:00:00
127,Human,DuoPlavin,clopidogrel acetylsalicylic acid,Vascular Surgical Procedures Peripheral Vascul...,no,Initial authorisation,2008-05-23 02:00:00,n,2008-05-23 02:00:00
131,Human,Pristiqs,desvenlafaxine,treatment of vasomotor symptoms associated wit...,no,Initial authorisation,2008-03-10 02:00:00,n,2008-03-10 02:00:00
117,Human,Ellefore,desvenlafaxine,Major Depressive Disorder,no,Initial authorisation,2008-10-13 02:00:00,n,2008-10-13 02:00:00
43,Human,Docetaxel SUN,docetaxel,Prostatic Neoplasms Non-Small-Cell Lung Carcin...,no,Initial authorisation,2016-06-06 02:00:00,n,2016-06-06 02:00:00
96,Human,Docetaxel Mylan,docetaxel,Non-Small-Cell Lung Carcinoma Breast Neoplasms...,no,Initial authorisation,2010-03-08 02:00:00,n,2010-03-08 02:00:00


## drop duplicates with same earliest date

In [15]:
df.drop(index=[128, 132, 133, 66, 68, 32, 17, 73, 74, 54], inplace=True)

In [16]:
df.shape

(141, 9)

## drop columns

In [18]:
df.drop(columns=['Category', 'Type of withdrawal', 'Date of withdrawal', 'commercial reason'], inplace=True)

In [19]:
df.head()

Unnamed: 0,Medicine name,International non-proprietary name (INN) / common name,Therapeutic area,Orphan medicine,earliest date
0,Puldysa,idebenone,Duchenne Muscular Dystrophy,yes,2020-10-28 15:12:00
1,Roctavian,Valoctocogene roxaparvovec,Hemophilia A,yes,2020-11-04 15:46:00
2,Tibsovo,ivosidenib,Acute Myeloid Leukemia,yes,2020-10-13 14:37:00
3,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,no,2020-07-17 09:15:00
4,Rayoqta,abicipar pegol,Macular Degeneration,no,2020-07-17 16:16:00


In [20]:
df.columns

Index(['Medicine name',
       'International non-proprietary name (INN) / common name',
       'Therapeutic area', 'Orphan medicine', 'earliest date'],
      dtype='object')

In [22]:
df.rename(axis='columns', mapper={'International non-proprietary name (INN) / common name': 'INN',
                                 'earliest date': 'First published'}, inplace=True)

In [23]:
df.head()

Unnamed: 0,Medicine name,INN,Therapeutic area,Orphan medicine,First published
0,Puldysa,idebenone,Duchenne Muscular Dystrophy,yes,2020-10-28 15:12:00
1,Roctavian,Valoctocogene roxaparvovec,Hemophilia A,yes,2020-11-04 15:46:00
2,Tibsovo,ivosidenib,Acute Myeloid Leukemia,yes,2020-10-13 14:37:00
3,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,no,2020-07-17 09:15:00
4,Rayoqta,abicipar pegol,Macular Degeneration,no,2020-07-17 16:16:00


In [27]:
df['Orphan medicine'] = df['Orphan medicine'].map({'yes': 1, 'no': 0})

In [28]:
df.head()

Unnamed: 0,Medicine name,INN,Therapeutic area,Orphan medicine,First published
0,Puldysa,idebenone,Duchenne Muscular Dystrophy,1,2020-10-28 15:12:00
1,Roctavian,Valoctocogene roxaparvovec,Hemophilia A,1,2020-11-04 15:46:00
2,Tibsovo,ivosidenib,Acute Myeloid Leukemia,1,2020-10-13 14:37:00
3,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,0,2020-07-17 09:15:00
4,Rayoqta,abicipar pegol,Macular Degeneration,0,2020-07-17 16:16:00


## get clinical trials data

In [30]:
# convert to datetime
df['First published'] = pd.to_datetime(df['First published'])

In [31]:
# prepare dataframe
df['n_trials'] = 0
df['status_not_yet_recruiting'] = 0
df['status_recruiting'] = 0
df['status_enrolling_by_invitation'] = 0
df['status_active_not_recruiting'] = 0
df['status_suspended'] = 0
df['status_terminated'] = 0
df['status_completed'] = 0
df['status_withdrawn'] = 0
df['status_unknown'] = 0

df['org_fed'] = 0
df['org_indiv'] = 0
df['org_industry'] = 0
df['org_network'] = 0
df['org_nih'] = 0
df['org_other'] = 0
df['org_other_gov'] = 0

df['phase_early_1'] = 0
df['phase_not_applicable'] = 0
df['phase_1'] = 0
df['phase_2'] = 0
df['phase_3'] = 0
df['phase_4'] = 0

In [32]:
# pull data from API into dataframe
for index, row in df.iterrows():
    
    if index >= 0:
        print(f'fetching trial data for {index}, {row["Medicine name"]}')

        # call ClinicalTrials API
        data = get_trial_data(row['INN'], row['Therapeutic area'], row['First published'])
    
        #print(data)
        
        # update dataframe
        df['n_trials'][index] = data['n_trials']
        
        df['status_not_yet_recruiting'][index] = data['status']['Not yet recruiting']
        df['status_recruiting'][index] = data['status']['Recruiting']
        df['status_enrolling_by_invitation'][index] = data['status']['Enrolling by invitation']
        df['status_active_not_recruiting'][index] = data['status']['Active, not recruiting']
        df['status_suspended'][index] = data['status']['Suspended']
        df['status_terminated'][index] = data['status']['Terminated']
        df['status_completed'][index] = data['status']['Completed']
        df['status_withdrawn'][index] = data['status']['Withdrawn']
        df['status_unknown'][index] = data['status']['Unknown status']
        
        df['org_fed'][index] = data['organizers']['FED']
        df['org_indiv'][index] = data['organizers']['INDIV']
        df['org_industry'][index] = data['organizers']['INDUSTRY']
        df['org_network'][index] = data['organizers']['NETWORK']
        df['org_nih'][index] = data['organizers']['NIH']
        df['org_other'][index] = data['organizers']['OTHER']
        df['org_other_gov'][index] = data['organizers']['OTHER_GOV']
        
        df['phase_early_1'][index] = data['phases']['Early Phase 1']
        df['phase_not_applicable'][index] = data['phases']['Not Applicable']
        df['phase_1'][index] = data['phases']['Phase 1']
        df['phase_2'][index] = data['phases']['Phase 2']
        df['phase_3'][index] = data['phases']['Phase 3']
        df['phase_4'][index] = data['phases']['Phase 4']
        

fetching trial data for 0, Puldysa 


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing

fetching trial data for 1, Roctavian 
fetching trial data for 2, Tibsovo 
fetching trial data for 3, Abilify MyCite 
fetching trial data for 4, Rayoqta 
fetching trial data for 5, Sondelbay 
fetching trial data for 6, Xiidra 
fetching trial data for 7, Doxorubicin Hydrochloride Tillomed 
fetching trial data for 8, Fingolimod Mylan 
fetching trial data for 9, Erlotinib Accord 
fetching trial data for 10, Ekesivy 
fetching trial data for 11, Idhifa 
fetching trial data for 12, Linhaliq 
fetching trial data for 13, Radicava 
fetching trial data for 14, Ambrisentan Zentiva 
fetching trial data for 15, Canakinumab Novartis 
fetching trial data for 16, Efgratin 
fetching trial data for 18, Epjevy 
fetching trial data for 19, Graspa 
fetching trial data for 20, Entolimod TMC 
fetching trial data for 21, Treprostinil SciPharm Sàrl  
fetching trial data for 22, Restaysis 
fetching trial data for 23, Raligize 
fetching trial data for 24, Rotigotine Mylan 
fetching trial data for 25, Prohippur 
f

In [33]:
df

Unnamed: 0,Medicine name,INN,Therapeutic area,Orphan medicine,First published,n_trials,status_not_yet_recruiting,status_recruiting,status_enrolling_by_invitation,status_active_not_recruiting,...,org_network,org_nih,org_other,org_other_gov,phase_early_1,phase_not_applicable,phase_1,phase_2,phase_3,phase_4
0,Puldysa,idebenone,Duchenne Muscular Dystrophy,1,2020-10-28 15:12:00,6,0,1,0,1,...,0,0,0,0,0,0,0,2,3,0
1,Roctavian,Valoctocogene roxaparvovec,Hemophilia A,1,2020-11-04 15:46:00,5,1,0,1,3,...,0,0,0,0,0,0,2,2,3,0
2,Tibsovo,ivosidenib,Acute Myeloid Leukemia,1,2020-10-13 14:37:00,15,2,9,0,2,...,0,0,7,0,0,0,8,6,3,0
3,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,0,2020-07-17 09:15:00,25,0,1,0,1,...,0,1,7,0,0,2,4,2,5,5
4,Rayoqta,abicipar pegol,Macular Degeneration,0,2020-07-17 16:16:00,8,0,0,0,0,...,0,0,0,0,0,0,2,4,2,0
5,Sondelbay,teriparatide,Osteoporosis,0,2020-06-19 15:44:00,132,1,9,0,11,...,0,1,69,2,0,13,11,34,27,34
6,Xiidra,Lifitegrast,Dry Eye Syndromes,0,2020-06-18 08:45:00,19,2,4,1,0,...,1,0,10,0,0,2,0,2,4,10
7,Doxorubicin Hydrochloride Tillomed,doxorubicin,Breast Neoplasms Ovarian Neoplasms Multiple My...,0,2020-03-02 12:06:00,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Fingolimod Mylan,fingolimod,Relapsing-Remitting Multiple Sclerosis,0,2020-05-08 16:36:00,48,1,3,0,2,...,0,0,17,0,0,2,2,2,9,25
9,Erlotinib Accord,erlotinib,Non-Small-Cell Lung Carcinoma Pancreatic Neopl...,0,2020-05-28 16:53:00,11,0,0,0,1,...,0,3,4,0,0,0,9,4,0,0


In [34]:
df.to_csv('../raw_data/withdrawn_CT.csv', index=False)

## get pubmed data

In [36]:
df = pd.read_csv('../raw_data/withdrawn_CT.csv')

# convert to datetime
df['First published'] = pd.to_datetime(df['First published'])

df.head()

Unnamed: 0,Medicine name,INN,Therapeutic area,Orphan medicine,First published,n_trials,status_not_yet_recruiting,status_recruiting,status_enrolling_by_invitation,status_active_not_recruiting,...,org_network,org_nih,org_other,org_other_gov,phase_early_1,phase_not_applicable,phase_1,phase_2,phase_3,phase_4
0,Puldysa,idebenone,Duchenne Muscular Dystrophy,1,2020-10-28 15:12:00,6,0,1,0,1,...,0,0,0,0,0,0,0,2,3,0
1,Roctavian,Valoctocogene roxaparvovec,Hemophilia A,1,2020-11-04 15:46:00,5,1,0,1,3,...,0,0,0,0,0,0,2,2,3,0
2,Tibsovo,ivosidenib,Acute Myeloid Leukemia,1,2020-10-13 14:37:00,15,2,9,0,2,...,0,0,7,0,0,0,8,6,3,0
3,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,0,2020-07-17 09:15:00,25,0,1,0,1,...,0,1,7,0,0,2,4,2,5,5
4,Rayoqta,abicipar pegol,Macular Degeneration,0,2020-07-17 16:16:00,8,0,0,0,0,...,0,0,0,0,0,0,2,4,2,0


In [39]:
# adjust index for easy merging with existing dataframe
df.index = df.index + 1222

In [37]:
df_abstracts = pd.DataFrame(columns=['id', 'abstract'])

In [42]:
df['pm_results'] = 0

In [43]:
df.head()

Unnamed: 0,Medicine name,INN,Therapeutic area,Orphan medicine,First published,n_trials,status_not_yet_recruiting,status_recruiting,status_enrolling_by_invitation,status_active_not_recruiting,...,org_nih,org_other,org_other_gov,phase_early_1,phase_not_applicable,phase_1,phase_2,phase_3,phase_4,pm_results
1222,Puldysa,idebenone,Duchenne Muscular Dystrophy,1,2020-10-28 15:12:00,6,0,1,0,1,...,0,0,0,0,0,0,2,3,0,0
1223,Roctavian,Valoctocogene roxaparvovec,Hemophilia A,1,2020-11-04 15:46:00,5,1,0,1,3,...,0,0,0,0,0,2,2,3,0,0
1224,Tibsovo,ivosidenib,Acute Myeloid Leukemia,1,2020-10-13 14:37:00,15,2,9,0,2,...,0,7,0,0,0,8,6,3,0,0
1225,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,0,2020-07-17 09:15:00,25,0,1,0,1,...,1,7,0,0,2,4,2,5,5,0
1226,Rayoqta,abicipar pegol,Macular Degeneration,0,2020-07-17 16:16:00,8,0,0,0,0,...,0,0,0,0,0,2,4,2,0,0


In [46]:
for index, row in df.iterrows():
    if index >= 0:
        print(index)
        print(row['Medicine name'])
        ids = get_pubmed_ids(row['INN'], row['Therapeutic area'], row['First published'])
        n_ids = len(ids)
        print(f'results: {n_ids}')
        df['pm_results'][index] = n_ids
        
        # fetch titles and abstracts of first 200 results
        titles, abstracts = get_titles_abstracts_batch(ids[:200])
        print(f'titles: {len(titles)}')
        print(f'abstracts: {len(abstracts)}')
        for abstract in abstracts:
            df_abstracts = df_abstracts.append(pd.DataFrame.from_dict({'id': [index], 'abstract': [abstract]}), ignore_index=True)
        print('')
        
df_abstracts.to_csv('abstracts_withdrawn.csv')      

1222
Puldysa 
results: 17


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


titles: 17
abstracts: 15

1223
Roctavian 
results: 5
no abstract
titles: 5
abstracts: 5

1224
Tibsovo 
results: 74
no title
no abstract
titles: 74
abstracts: 70

1225
Abilify MyCite 
results: 224
no title
no abstract
no abstract
no abstract
titles: 195
abstracts: 185

1226
Rayoqta 
results: 14
no abstract
no abstract
titles: 14
abstracts: 14

1227
Sondelbay 
results: 1696
no abstract
no abstract
no abstract
titles: 200
abstracts: 191

1228
Xiidra 
results: 27
titles: 27
abstracts: 21

1229
Doxorubicin Hydrochloride Tillomed 
results: 5
titles: 5
abstracts: 5

1230
Fingolimod Mylan 
results: 778
no abstract
titles: 200
abstracts: 195

1231
Erlotinib Accord 
results: 39
titles: 39
abstracts: 39

1232
Ekesivy 
results: 14
titles: 14
abstracts: 9

1233
Idhifa 
results: 85
no title
no abstract
no abstract
titles: 85
abstracts: 75

1234
Linhaliq 
results: 8
titles: 8
abstracts: 8

1235
Radicava 
results: 89
no title
no abstract
no abstract
titles: 86
abstracts: 74

1236
Ambrisentan Zentiva 


In [47]:
df_abstracts

Unnamed: 0,id,abstract
0,1222,"In Duchenne muscular dystrophy (DMD), progress..."
1,1222,Cardiorespiratory failure is the leading cause...
2,1222,Assessment of dynamic inspiratory function may...
3,1222,Respiratory failure is the principal source of...
4,1222,In Duchenne muscular dystrophy (DMD) progressi...
5,1222,Patients with DMD experience progressive restr...
6,1222,Decline in respiratory function in patients wi...
7,1222,Duchenne muscular dystrophy (DMD) is a progres...
8,1222,"In this review, we discuss the therapies used ..."
9,1222,The dystrophinopathies include Duchenne muscul...


In [50]:
from nltk.tokenize import sent_tokenize

def n_last_senteces(text, n):
    return ' '.join(sent_tokenize(text)[-n:])

df["conclusions"] = ""

df_abstr = pd.read_csv('abstracts_withdrawn.csv', index_col=0)

In [51]:
df_abstr

Unnamed: 0,id,abstract
0,1222,"In Duchenne muscular dystrophy (DMD), progress..."
1,1222,Cardiorespiratory failure is the leading cause...
2,1222,Assessment of dynamic inspiratory function may...
3,1222,Respiratory failure is the principal source of...
4,1222,In Duchenne muscular dystrophy (DMD) progressi...
5,1222,Patients with DMD experience progressive restr...
6,1222,Decline in respiratory function in patients wi...
7,1222,Duchenne muscular dystrophy (DMD) is a progres...
8,1222,"In this review, we discuss the therapies used ..."
9,1222,The dystrophinopathies include Duchenne muscul...


In [56]:
df_abstr.abstract.fillna("", inplace=True)

In [57]:
for index, row in df.iterrows():
    print(index)
    conclusions = ""
    for _, row_abstr in df_abstr.loc[df_abstr['id'] == index].iterrows():
        conclusion = n_last_senteces(row_abstr['abstract'], 2)
        conclusions += " " + conclusion
    df['conclusions'][index] = conclusions    

1222
1223


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362


In [58]:
df

Unnamed: 0,Medicine name,INN,Therapeutic area,Orphan medicine,First published,n_trials,status_not_yet_recruiting,status_recruiting,status_enrolling_by_invitation,status_active_not_recruiting,...,org_other,org_other_gov,phase_early_1,phase_not_applicable,phase_1,phase_2,phase_3,phase_4,pm_results,conclusions
1222,Puldysa,idebenone,Duchenne Muscular Dystrophy,1,2020-10-28 15:12:00,6,0,1,0,1,...,0,0,0,0,0,2,3,0,17,"Furthermore, patients in the placebo group us..."
1223,Roctavian,Valoctocogene roxaparvovec,Hemophilia A,1,2020-11-04 15:46:00,5,1,0,1,3,...,0,0,0,0,2,2,3,0,5,In a phase 1/2 clinical study of AAV5-hFVIII-...
1224,Tibsovo,ivosidenib,Acute Myeloid Leukemia,1,2020-10-13 14:37:00,15,2,9,0,2,...,7,0,0,0,8,6,3,0,74,The U.S. Food and Drug Administration (FDA) a...
1225,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,0,2020-07-17 09:15:00,25,0,1,0,1,...,7,0,0,2,4,2,5,5,224,Both monohydrate and lauroxil formulations re...
1226,Rayoqta,abicipar pegol,Macular Degeneration,0,2020-07-17 16:16:00,8,0,0,0,0,...,0,0,0,0,2,4,2,0,14,These two latter phase III trials revealed t...
1227,Sondelbay,teriparatide,Osteoporosis,0,2020-06-19 15:44:00,132,1,9,0,11,...,69,2,0,13,11,34,27,34,1696,There was no significant difference between t...
1228,Xiidra,Lifitegrast,Dry Eye Syndromes,0,2020-06-18 08:45:00,19,2,4,1,0,...,10,0,0,2,0,2,4,10,27,Drop comfort scores with lifitegrast improved...
1229,Doxorubicin Hydrochloride Tillomed,doxorubicin,Breast Neoplasms Ovarian Neoplasms Multiple My...,0,2020-03-02 12:06:00,0,0,0,0,0,...,0,0,0,0,0,0,0,0,5,The most common treatment-related adverse eve...
1230,Fingolimod Mylan,fingolimod,Relapsing-Remitting Multiple Sclerosis,0,2020-05-08 16:36:00,48,1,3,0,2,...,17,0,0,2,2,2,9,25,778,The correlation analysis revealed that the ch...
1231,Erlotinib Accord,erlotinib,Non-Small-Cell Lung Carcinoma Pancreatic Neopl...,0,2020-05-28 16:53:00,11,0,0,0,1,...,4,0,0,0,9,4,0,0,39,Drug-metabolising activity assessed by the er...


In [60]:
df['Authorisation status'] = 1

In [61]:
df

Unnamed: 0,Medicine name,INN,Therapeutic area,Orphan medicine,First published,n_trials,status_not_yet_recruiting,status_recruiting,status_enrolling_by_invitation,status_active_not_recruiting,...,org_other_gov,phase_early_1,phase_not_applicable,phase_1,phase_2,phase_3,phase_4,pm_results,conclusions,Authorisation status
1222,Puldysa,idebenone,Duchenne Muscular Dystrophy,1,2020-10-28 15:12:00,6,0,1,0,1,...,0,0,0,0,2,3,0,17,"Furthermore, patients in the placebo group us...",1
1223,Roctavian,Valoctocogene roxaparvovec,Hemophilia A,1,2020-11-04 15:46:00,5,1,0,1,3,...,0,0,0,2,2,3,0,5,In a phase 1/2 clinical study of AAV5-hFVIII-...,1
1224,Tibsovo,ivosidenib,Acute Myeloid Leukemia,1,2020-10-13 14:37:00,15,2,9,0,2,...,0,0,0,8,6,3,0,74,The U.S. Food and Drug Administration (FDA) a...,1
1225,Abilify MyCite,aripiprazole,Schizophrenia Bipolar Disorder,0,2020-07-17 09:15:00,25,0,1,0,1,...,0,0,2,4,2,5,5,224,Both monohydrate and lauroxil formulations re...,1
1226,Rayoqta,abicipar pegol,Macular Degeneration,0,2020-07-17 16:16:00,8,0,0,0,0,...,0,0,0,2,4,2,0,14,These two latter phase III trials revealed t...,1
1227,Sondelbay,teriparatide,Osteoporosis,0,2020-06-19 15:44:00,132,1,9,0,11,...,2,0,13,11,34,27,34,1696,There was no significant difference between t...,1
1228,Xiidra,Lifitegrast,Dry Eye Syndromes,0,2020-06-18 08:45:00,19,2,4,1,0,...,0,0,2,0,2,4,10,27,Drop comfort scores with lifitegrast improved...,1
1229,Doxorubicin Hydrochloride Tillomed,doxorubicin,Breast Neoplasms Ovarian Neoplasms Multiple My...,0,2020-03-02 12:06:00,0,0,0,0,0,...,0,0,0,0,0,0,0,5,The most common treatment-related adverse eve...,1
1230,Fingolimod Mylan,fingolimod,Relapsing-Remitting Multiple Sclerosis,0,2020-05-08 16:36:00,48,1,3,0,2,...,0,0,2,2,2,9,25,778,The correlation analysis revealed that the ch...,1
1231,Erlotinib Accord,erlotinib,Non-Small-Cell Lung Carcinoma Pancreatic Neopl...,0,2020-05-28 16:53:00,11,0,0,0,1,...,0,0,0,9,4,0,0,39,Drug-metabolising activity assessed by the er...,1


In [62]:
df.to_csv('../raw_data/withdrawn_CT_PM_conclusions.csv')