**Notes:**
* This code merges various AACT tables (Intervention, Conditions, Studies, etc.)  with Cortellis Drug database
* After the merge, the final dataset is created which has only trials with Phase 2 status as completed, and  Phase 4 status as well completed
* We label all trials with Phase 2 and Phase 4 completed as "success", and remaining Phase 2 trials with no Phase 4 as "failure"

In [83]:
import numpy as np
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)

In [102]:
# interventions table from AACT with drug name and NCT mapping
df_interventions = pd.read_csv('b3t0zvq5n6oyhvv9h6v8qsqg4j57/interventions.txt', sep='|')

In [108]:
df_interventions.head()

Unnamed: 0,id,nct_id,intervention_type,name,description,trial_drug_cleaned
1,12864317,NCT00998946,DRUG,Pralatrexate,Pralatrexate injection administered as intrave...,pralatrexate
4,12864320,NCT00636311,DRUG,"Ifosfamide, Gemcitabine, Vinorelbine","Ifosfamide 2000 mg/sqm, day 1-4 (plus MESNA); ...",ifosfamidegemcitabinevinorelbine
5,12864321,NCT00636311,DRUG,Bortezomib + IGEV,"Bortezomib 1,3 mg/sqm, day 1, 4, 8; Ifosfamide...",bortezomibigev
7,12864323,NCT00552708,DRUG,COREG CR,,coregcr
8,12864324,NCT00552708,DRUG,Lisinopril,,lisinopril


In [104]:
# replaces all non-word characters in drug name with an empty string
df_interventions['trial_drug_cleaned'] = df_interventions['name'].str.replace(r'[^\w]', '', regex=True)

In [105]:
# filter only on drugs (remove medical devices, etc.)
df_interventions = df_interventions[df_interventions['intervention_type'].str.contains('DRUG')]

In [106]:
# lower case cleaned drug name
df_interventions['trial_drug_cleaned'] = df_interventions['trial_drug_cleaned'].str.lower()
df_interventions.head()

Unnamed: 0,id,nct_id,intervention_type,name,description,trial_drug_cleaned
1,12864317,NCT00998946,DRUG,Pralatrexate,Pralatrexate injection administered as intrave...,pralatrexate
4,12864320,NCT00636311,DRUG,"Ifosfamide, Gemcitabine, Vinorelbine","Ifosfamide 2000 mg/sqm, day 1-4 (plus MESNA); ...",ifosfamidegemcitabinevinorelbine
5,12864321,NCT00636311,DRUG,Bortezomib + IGEV,"Bortezomib 1,3 mg/sqm, day 1, 4, 8; Ifosfamide...",bortezomibigev
7,12864323,NCT00552708,DRUG,COREG CR,,coregcr
8,12864324,NCT00552708,DRUG,Lisinopril,,lisinopril


In [109]:
df_interventions.shape

(364595, 6)

In [110]:
# import conditions table from AACT database
df_conditions = pd.read_csv('b3t0zvq5n6oyhvv9h6v8qsqg4j57/conditions.txt', sep='|')
df_conditions.head()

Unnamed: 0,id,nct_id,name,downcase_name
0,12952355,NCT05084716,HIV,hiv
1,13078623,NCT00353353,Healthy,healthy
2,13079064,NCT00105651,Cancer,cancer
3,13079170,NCT02050477,Obesity,obesity
4,13079608,NCT05279053,Sex,sex


In [111]:
df_conditions.shape

(879477, 4)

In [None]:
# export unique conditions to categorize in disease types and therapy areas
unique_aact_conditions = pd.DataFrame(df_conditions['name'].unique(), columns=['Unique_AACT_Conditions'])
unique_aact_conditions.to_csv('unique_aact_conditions.txt', sep='|', index=True)

In [112]:
# merge interventions with conditions
merged_df_interv_cond=pd.merge(df_interventions, df_conditions, left_on="nct_id", right_on="nct_id", how="inner")

In [113]:
# replaces all non-word characters in conditions with an empty string
# lower case cleaned condition
merged_df_interv_cond['trial_cleaned_conditions'] = merged_df_interv_cond['downcase_mesh_term'].str.replace(r'[^\w]', '', regex=True)
merged_df_interv_cond['trial_cleaned_conditions'] = merged_df_interv_cond['trial_cleaned_conditions'].str.lower()
merged_df_interv_cond.head()

Unnamed: 0,id_x,nct_id,intervention_type,name_x,description,trial_drug_cleaned,id_y,name_y,downcase_name
0,12864317,NCT00998946,DRUG,Pralatrexate,Pralatrexate injection administered as intrave...,pralatrexate,13208641,"Lymphoma, B-Cell","lymphoma, b-cell"
1,12864320,NCT00636311,DRUG,"Ifosfamide, Gemcitabine, Vinorelbine","Ifosfamide 2000 mg/sqm, day 1-4 (plus MESNA); ...",ifosfamidegemcitabinevinorelbine,13208642,Hodgkin Disease,hodgkin disease
2,12864321,NCT00636311,DRUG,Bortezomib + IGEV,"Bortezomib 1,3 mg/sqm, day 1, 4, 8; Ifosfamide...",bortezomibigev,13208642,Hodgkin Disease,hodgkin disease
3,12864323,NCT00552708,DRUG,COREG CR,,coregcr,13208652,Hypertension,hypertension
4,12864324,NCT00552708,DRUG,Lisinopril,,lisinopril,13208652,Hypertension,hypertension


In [114]:
# drop unwanted columns and clean column names
merged_df_interv_cond = merged_df_interv_cond.drop(columns=['id_x','name_x','name_y','id_y'])
merged_df_interv_cond = merged_df_interv_cond.rename(columns={'downcase_name': 'conditions'})

In [115]:
merged_df_interv_cond.shape

(629524, 5)

In [116]:
merged_df_interv_cond.head()

Unnamed: 0,nct_id,intervention_type,description,trial_drug_cleaned,conditions
0,NCT00998946,DRUG,Pralatrexate injection administered as intrave...,pralatrexate,"lymphoma, b-cell"
1,NCT00636311,DRUG,"Ifosfamide 2000 mg/sqm, day 1-4 (plus MESNA); ...",ifosfamidegemcitabinevinorelbine,hodgkin disease
2,NCT00636311,DRUG,"Bortezomib 1,3 mg/sqm, day 1, 4, 8; Ifosfamide...",bortezomibigev,hodgkin disease
3,NCT00552708,DRUG,,coregcr,hypertension
4,NCT00552708,DRUG,,lisinopril,hypertension


In [117]:
# read cortellis drug data
df_cortellis_drug = pd.read_csv('cortellis/Drugs_Results_FULL.csv', sep=',')

In [118]:
# replaces all non-word characters in drug name with an empty string
# lower case cleaned drug name
df_cortellis_drug['cortellis_cleaned_drug'] = df_cortellis_drug['Drug Name'].str.replace(r'[^\w]', '', regex=True)
df_cortellis_drug['cortellis_cleaned_drug'] = df_cortellis_drug['cortellis_cleaned_drug'].str.lower()

In [119]:
df_cortellis_drug.head()

Unnamed: 0.1,Unnamed: 0,Drug Name,Highest Status,Other Drug Names,Originator Company,Originator Company HQ,Active Companies,Active Companies HQ,Therapy Area,Active Indications,Action,Technologies,Regulatory Designations,Inactive Indications,Inactive Companies,Has Deals,Last Change Date,Added Date,First Launched Date,Extract,Drug Id,cortellis_cleaned_drug
0,0,GRI-004,No Development Reported,GRI-004; natural killer cell type II modulator...,GRI Bio Inc,GRI Bio Inc (US),,,Gastrointestinal; Cardiovascular; Immune,,Anti-inflammatory; Immunomodulator; Natural ki...,Small molecule therapeutic,,Autoimmune disease; Liver disease; Reperfusion...,GRI Bio Inc,No,2019-01-15,2012-05-30,,As no development has been reported for some t...,78327,gri004
1,1,"recombinant bacterial strain vaccine (oral, au...",No Development Reported,"recombinant bacterial strain vaccine (oral, au...",Universidade Federal de Minas Gerais,Universidade Federal de Minas Gerais (Brazil),,,Immune,,Recombinant bacterial vector vaccine; Therapeu...,Bacteria recombinant; Biological therapeutic; ...,,Autoimmune disease,Universidade Federal de Minas Gerais,No,2013-05-01,2012-05-29,,As no development has been reported for some t...,78325,recombinantbacterialstrainvaccineoralautoimmun...
2,2,"orexin-1/2 antagonists (insomnia), Evotec",No Development Reported,EP-009-0049; EP-009-0237; EP-009-0466; EP-009-...,Evotec SE,Evotec SE (Germany),,,Neurology/Psychiatric,,Hypnotic; Orexin 1 receptor antagonist; Orexin...,Small molecule therapeutic,,Insomnia,Evotec SE,No,2014-10-20,2012-05-29,,As no development has been reported for some t...,78324,orexin12antagonistsinsomniaevotec
3,3,"FAAH inhibitors (pain/anxiety), Evotec",No Development Reported,"FAAH inhibitors (pain/anxiety), Evotec",Evotec SE,Evotec SE (Germany),,,Neurology/Psychiatric,,Analgesic; Anxiolytic; Fatty acid amide hydrol...,Small molecule therapeutic,,Anxiety disorders; Pain,Evotec SE,No,2015-03-13,2012-05-29,,As no development has been reported for some t...,78321,faahinhibitorspainanxietyevotec
4,4,HA-Navelbine,No Development Reported,HA-Navelbine; navelbine + hyaluronic acid (can...,Monash University,Monash University (Australia),,,Cancer,,Cell cycle inhibitor; Hyaluronic acid receptor...,Small molecule therapeutic,,Cancer,Monash University,No,2014-10-08,2012-05-31,,As no development has been reported for some t...,78320,hanavelbine


In [120]:
df_cortellis_drug.shape

(90649, 22)

In [121]:
# merge cortellis drug database with AACT intervention_condition table
merged_df_1020 = pd.merge(merged_df_interv_cond, df_cortellis_drug,left_on=['trial_drug_cleaned'], right_on=['cortellis_cleaned_drug'], how="inner")
merged_df_1020.head()

Unnamed: 0.1,nct_id,intervention_type,description,trial_drug_cleaned,conditions,Unnamed: 0,Drug Name,Highest Status,Other Drug Names,Originator Company,Originator Company HQ,Active Companies,Active Companies HQ,Therapy Area,Active Indications,Action,Technologies,Regulatory Designations,Inactive Indications,Inactive Companies,Has Deals,Last Change Date,Added Date,First Launched Date,Extract,Drug Id,cortellis_cleaned_drug
0,NCT00998946,DRUG,Pralatrexate injection administered as intrave...,pralatrexate,"lymphoma, b-cell",2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate
1,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,adenocarcinoma of the esophagus,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate
2,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,adenocarcinomas of the gastroesophageal junction,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate
3,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,recurrent esophageal cancer,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate
4,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,squamous cell carcinoma of the esophagus,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate


In [32]:
merged_df_1020.shape

(201134, 27)

In [33]:
merged_df_1020['cortellis_cleaned_drug'].value_counts()

methotrexate    7863
carboplatin     6006
tacrolimus      5728
paclitaxel      5461
cisplatin       5072
                ... 
agt2               1
agt4               1
azd8165            1
mtc01              1
exn407             1
Name: cortellis_cleaned_drug, Length: 7380, dtype: int64

In [34]:
# list all drug statuses
merged_df_1020['Highest Status'].unique()

array(['Launched', 'Phase 1 Clinical', 'Discontinued', 'Phase 2 Clinical',
       'No Development Reported', 'Phase 3 Clinical', 'Registered',
       'Withdrawn', 'Preclinical', 'Discovery', 'Pre-registration',
       'Suspended', 'Clinical'], dtype=object)

## Read AACT files

In [122]:
# read studies table from AACT database
df_studies = pd.read_csv('b3t0zvq5n6oyhvv9h6v8qsqg4j57/studies.txt', sep='|')
df_studies.head()

  df_studies = pd.read_csv('b3t0zvq5n6oyhvv9h6v8qsqg4j57/studies.txt', sep='|')


Unnamed: 0,nct_id,nlm_download_date_description,study_first_submitted_date,results_first_submitted_date,disposition_first_submitted_date,last_update_submitted_date,study_first_submitted_qc_date,study_first_posted_date,study_first_posted_date_type,results_first_submitted_qc_date,results_first_posted_date,results_first_posted_date_type,disposition_first_submitted_qc_date,disposition_first_posted_date,disposition_first_posted_date_type,last_update_submitted_qc_date,last_update_posted_date,last_update_posted_date_type,start_month_year,start_date_type,start_date,verification_month_year,verification_date,completion_month_year,completion_date_type,completion_date,primary_completion_month_year,primary_completion_date_type,primary_completion_date,target_duration,study_type,acronym,baseline_population,brief_title,official_title,overall_status,last_known_status,phase,enrollment,enrollment_type,source,limitations_and_caveats,number_of_arms,number_of_groups,why_stopped,has_expanded_access,expanded_access_type_individual,expanded_access_type_intermediate,expanded_access_type_treatment,has_dmc,is_fda_regulated_drug,is_fda_regulated_device,is_unapproved_device,is_ppsd,is_us_export,biospec_retention,biospec_description,ipd_time_frame,ipd_access_criteria,ipd_url,plan_to_share_ipd,plan_to_share_ipd_description,created_at,updated_at,source_class,delayed_posting,expanded_access_nctid,expanded_access_status_for_nctid,fdaaa801_violation,baseline_type_units_analyzed,patient_registry
0,NCT02063451,,2014-02-11,,,2018-09-20,2014-02-12,2014-02-14,ESTIMATED,,,,,,,2018-09-20,2018-09-24,ACTUAL,2011-02,ACTUAL,2011-02-28,2018-09,2018-09-30,2013-01-15,ACTUAL,2013-01-15,2013-01-15,ACTUAL,2013-01-15,,INTERVENTIONAL,,,Neurohormonal & Behavioral Correlates of Obesi...,,COMPLETED,,,16.0,ACTUAL,University of Michigan,,2.0,,,f,,,,f,,,,,,,,,,,,,2024-08-09 14:52:35.256038,2024-08-09 14:52:35.256038,OTHER,,,,,,
1,NCT03468751,,2018-03-11,,,2020-05-28,2018-03-11,2018-03-19,ACTUAL,,,,,,,2020-05-28,2020-06-01,ACTUAL,2018-02-14,ACTUAL,2018-02-14,2019-05,2019-05-31,2020-08-31,ESTIMATED,2020-08-31,2020-06-30,ESTIMATED,2020-06-30,,INTERVENTIONAL,,,"Phase 1 Study of HLX10, a Monoclonal Antibody ...",A Prospective Open-label Dose Escalation Phase...,UNKNOWN,RECRUITING,PHASE1,30.0,ESTIMATED,"Henlix, Inc",,2.0,,,f,,,,f,t,f,,,,,,,,,NO,,2024-08-09 14:52:35.256038,2024-08-09 14:52:35.256038,INDUSTRY,,,,,,
2,NCT01120951,,2010-05-09,,,2010-05-17,2010-05-09,2010-05-11,ESTIMATED,,,,,,,2010-05-17,2010-05-18,ESTIMATED,2010-01,,2010-01-31,2010-05,2010-05-31,2010-04,ACTUAL,2010-04-30,2010-04,ACTUAL,2010-04-30,,OBSERVATIONAL,,,Clinical Assessment of the Use of the R8 Respi...,Clinical Assessment of the Use of the R8 Respi...,COMPLETED,,,220.0,ACTUAL,Anaxsys Technology Ltd,,,,,f,,,,t,,,,,,,,,,,,,2024-08-09 14:52:35.256038,2024-08-09 14:52:35.256038,INDUSTRY,,,,,,
3,NCT06419725,,2024-05-14,,,2024-05-14,2024-05-14,2024-05-17,ACTUAL,,,,,,,2024-05-14,2024-05-17,ACTUAL,2016-01-01,ACTUAL,2016-01-01,2024-05,2024-05-31,2023-12-31,ACTUAL,2023-12-31,2023-12-31,ACTUAL,2023-12-31,,OBSERVATIONAL,,,Temporal Trends in Nonattendence Rate for Sche...,Temporal Trends in Nonattendence Rate for Sche...,COMPLETED,,,13823093.0,ACTUAL,Hospital Italiano de Buenos Aires,,,,,f,,,,,f,f,,,,,,,,,NO,,2024-08-04 00:38:17.671537,2024-08-04 00:38:17.671537,OTHER,,,,,,f
4,NCT06034951,,2023-08-22,,,2024-04-25,2023-09-05,2023-09-13,ACTUAL,,,,,,,2024-04-25,2024-04-26,ACTUAL,2023-05-24,ACTUAL,2023-05-24,2024-04,2024-04-30,2023-12-22,ACTUAL,2023-12-22,2023-08-21,ACTUAL,2023-08-21,,INTERVENTIONAL,,,Acceptability and Tolerance Study of a High En...,Acceptability (Including Gastrointestinal Tole...,COMPLETED,,,16.0,ACTUAL,Société des Produits Nestlé (SPN),,1.0,,,f,,,,,f,f,,,,,,,,,YES,Abstract and full publication to be made avail...,2024-08-09 14:52:35.256038,2024-08-09 14:52:35.256038,INDUSTRY,,,,,,


In [123]:
# merge studies table from AACT with previous cortellis-intervention-conditions dataframe
merged_df_1020 = pd.merge(merged_df_1020, df_studies, left_on="nct_id", right_on="nct_id", how="inner")

In [124]:
merged_df_1020.shape

(201134, 97)

In [125]:
merged_df_1020.head()

Unnamed: 0.1,nct_id,intervention_type,description,trial_drug_cleaned,conditions,Unnamed: 0,Drug Name,Highest Status,Other Drug Names,Originator Company,Originator Company HQ,Active Companies,Active Companies HQ,Therapy Area,Active Indications,Action,Technologies,Regulatory Designations,Inactive Indications,Inactive Companies,Has Deals,Last Change Date,Added Date,First Launched Date,Extract,Drug Id,cortellis_cleaned_drug,nlm_download_date_description,study_first_submitted_date,results_first_submitted_date,disposition_first_submitted_date,last_update_submitted_date,study_first_submitted_qc_date,study_first_posted_date,study_first_posted_date_type,results_first_submitted_qc_date,results_first_posted_date,results_first_posted_date_type,disposition_first_submitted_qc_date,disposition_first_posted_date,disposition_first_posted_date_type,last_update_submitted_qc_date,last_update_posted_date,last_update_posted_date_type,start_month_year,start_date_type,start_date,verification_month_year,verification_date,completion_month_year,completion_date_type,completion_date,primary_completion_month_year,primary_completion_date_type,primary_completion_date,target_duration,study_type,acronym,baseline_population,brief_title,official_title,overall_status,last_known_status,phase,enrollment,enrollment_type,source,limitations_and_caveats,number_of_arms,number_of_groups,why_stopped,has_expanded_access,expanded_access_type_individual,expanded_access_type_intermediate,expanded_access_type_treatment,has_dmc,is_fda_regulated_drug,is_fda_regulated_device,is_unapproved_device,is_ppsd,is_us_export,biospec_retention,biospec_description,ipd_time_frame,ipd_access_criteria,ipd_url,plan_to_share_ipd,plan_to_share_ipd_description,created_at,updated_at,source_class,delayed_posting,expanded_access_nctid,expanded_access_status_for_nctid,fdaaa801_violation,baseline_type_units_analyzed,patient_registry
0,NCT00998946,DRUG,Pralatrexate injection administered as intrave...,pralatrexate,"lymphoma, b-cell",2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2009-10-19,2021-10-07,,2021-10-07,2009-10-19,2009-10-21,ESTIMATED,2021-10-07,2021-11-05,ACTUAL,,,,2021-10-07,2021-11-05,ACTUAL,2009-09,ACTUAL,2009-09-30,2021-10,2021-10-31,2012-08,ACTUAL,2012-08-31,2012-08,ACTUAL,2012-08-31,,INTERVENTIONAL,,Safety Population included all participants wh...,Study of Pralatrexate to Treat Participants Wi...,"A Phase 2, Single-arm, Open-label, Multi-cente...",COMPLETED,,PHASE2,29.0,ACTUAL,"Spectrum Pharmaceuticals, Inc",,1.0,,,f,,,,,t,f,,,,,,,,,NO,,2024-08-09 22:00:39.992088,2024-08-09 22:00:39.992088,INDUSTRY,,,,,,
1,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,adenocarcinoma of the esophagus,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2010-05-21,2015-09-15,,2016-04-25,2010-05-21,2010-05-24,ESTIMATED,2015-09-15,2015-10-16,ESTIMATED,,,,2016-04-25,2016-06-01,ESTIMATED,2010-07,,2010-07-31,2016-04,2016-04-30,,,,2012-09,ACTUAL,2012-09-30,,INTERVENTIONAL,,,Pralatrexate and Docetaxel in Treating Patient...,Phase II Study of Pralatrexate and Docetaxel i...,COMPLETED,,PHASE2,6.0,ACTUAL,Ohio State University Comprehensive Cancer Center,,1.0,,,f,,,,t,,,,,,,,,,,,,2024-08-03 23:37:43.166171,2024-08-03 23:37:43.166171,OTHER,,,,,,
2,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,adenocarcinomas of the gastroesophageal junction,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2010-05-21,2015-09-15,,2016-04-25,2010-05-21,2010-05-24,ESTIMATED,2015-09-15,2015-10-16,ESTIMATED,,,,2016-04-25,2016-06-01,ESTIMATED,2010-07,,2010-07-31,2016-04,2016-04-30,,,,2012-09,ACTUAL,2012-09-30,,INTERVENTIONAL,,,Pralatrexate and Docetaxel in Treating Patient...,Phase II Study of Pralatrexate and Docetaxel i...,COMPLETED,,PHASE2,6.0,ACTUAL,Ohio State University Comprehensive Cancer Center,,1.0,,,f,,,,t,,,,,,,,,,,,,2024-08-03 23:37:43.166171,2024-08-03 23:37:43.166171,OTHER,,,,,,
3,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,recurrent esophageal cancer,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2010-05-21,2015-09-15,,2016-04-25,2010-05-21,2010-05-24,ESTIMATED,2015-09-15,2015-10-16,ESTIMATED,,,,2016-04-25,2016-06-01,ESTIMATED,2010-07,,2010-07-31,2016-04,2016-04-30,,,,2012-09,ACTUAL,2012-09-30,,INTERVENTIONAL,,,Pralatrexate and Docetaxel in Treating Patient...,Phase II Study of Pralatrexate and Docetaxel i...,COMPLETED,,PHASE2,6.0,ACTUAL,Ohio State University Comprehensive Cancer Center,,1.0,,,f,,,,t,,,,,,,,,,,,,2024-08-03 23:37:43.166171,2024-08-03 23:37:43.166171,OTHER,,,,,,
4,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,squamous cell carcinoma of the esophagus,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2010-05-21,2015-09-15,,2016-04-25,2010-05-21,2010-05-24,ESTIMATED,2015-09-15,2015-10-16,ESTIMATED,,,,2016-04-25,2016-06-01,ESTIMATED,2010-07,,2010-07-31,2016-04,2016-04-30,,,,2012-09,ACTUAL,2012-09-30,,INTERVENTIONAL,,,Pralatrexate and Docetaxel in Treating Patient...,Phase II Study of Pralatrexate and Docetaxel i...,COMPLETED,,PHASE2,6.0,ACTUAL,Ohio State University Comprehensive Cancer Center,,1.0,,,f,,,,t,,,,,,,,,,,,,2024-08-03 23:37:43.166171,2024-08-03 23:37:43.166171,OTHER,,,,,,


In [39]:
# rename dataframe
merged_df_1021 = merged_df_1020

In [40]:
# list all drug statuses
merged_df_1021['Highest Status'].unique()

array(['Launched', 'Phase 2 Clinical', 'Discontinued', 'Phase 3 Clinical',
       'Phase 1 Clinical', 'No Development Reported', 'Registered',
       'Withdrawn', 'Preclinical', 'Suspended', 'Pre-registration',
       'Discovery', 'Clinical'], dtype=object)

In [41]:
# drop unwanted Highest Status
merged_df_1021 = merged_df_1021[~(merged_df_1021['Highest Status'].isin(['Phase 2 Clinical', 'Phase 3 Clinical',
                                                                         'Phase 1 Clinical','Preclinical',
                                                                          'Discovery','Clinical']))]

In [43]:
# create a trial outcome variable; label success and failure drugs
# Create a copy of the DataFrame to avoid the SettingWithCopyWarning
merged_df_1021_copy = merged_df_1021.copy()

# Define the list of values that indicate a successful outcome
value_list = ['Launched', 'Registered', 'Pre-registration']

# Create the 'drug_outcome' column in the copied DataFrame
merged_df_1021_copy['drug_outcome'] = merged_df_1021_copy['Highest Status'].isin(value_list).map({True: 'success', False: 'failure'})


In [70]:
merged_df_1021_copy.head()

Unnamed: 0.2,Unnamed: 0.1,nct_id,intervention_type,description,trial_drug_cleaned,conditions,Unnamed: 0,Drug Name,Highest Status,Other Drug Names,Originator Company,Originator Company HQ,Active Companies,Active Companies HQ,Therapy Area,Active Indications,Action,Technologies,Regulatory Designations,Inactive Indications,Inactive Companies,Has Deals,Last Change Date,Added Date,First Launched Date,Extract,Drug Id,cortellis_cleaned_drug,nlm_download_date_description,study_first_submitted_date,results_first_submitted_date,disposition_first_submitted_date,last_update_submitted_date,study_first_submitted_qc_date,study_first_posted_date,study_first_posted_date_type,results_first_submitted_qc_date,results_first_posted_date,results_first_posted_date_type,disposition_first_submitted_qc_date,disposition_first_posted_date,disposition_first_posted_date_type,last_update_submitted_qc_date,last_update_posted_date,last_update_posted_date_type,start_month_year,start_date_type,start_date,verification_month_year,verification_date,completion_month_year,completion_date_type,completion_date,primary_completion_month_year,primary_completion_date_type,primary_completion_date,target_duration,study_type,acronym,baseline_population,brief_title,official_title,overall_status,last_known_status,phase,enrollment,enrollment_type,source,limitations_and_caveats,number_of_arms,number_of_groups,why_stopped,has_expanded_access,expanded_access_type_individual,expanded_access_type_intermediate,expanded_access_type_treatment,has_dmc,is_fda_regulated_drug,is_fda_regulated_device,is_unapproved_device,is_ppsd,is_us_export,biospec_retention,biospec_description,ipd_time_frame,ipd_access_criteria,ipd_url,plan_to_share_ipd,plan_to_share_ipd_description,created_at,updated_at,source_class,delayed_posting,expanded_access_nctid,expanded_access_status_for_nctid,fdaaa801_violation,baseline_type_units_analyzed,patient_registry,drug_outcome
0,0,NCT00998946,DRUG,Pralatrexate injection administered as intrave...,pralatrexate,"lymphoma, b-cell",2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2009-10-19,2021-10-07,,2021-10-07,2009-10-19,2009-10-21,ESTIMATED,2021-10-07,2021-11-05,ACTUAL,,,,2021-10-07,2021-11-05,ACTUAL,2009-09,ACTUAL,2009-09-30,2021-10,2021-10-31,2012-08,ACTUAL,2012-08-31,2012-08,ACTUAL,2012-08-31,,INTERVENTIONAL,,Safety Population included all participants wh...,Study of Pralatrexate to Treat Participants Wi...,"A Phase 2, Single-arm, Open-label, Multi-cente...",COMPLETED,,PHASE2,29.0,ACTUAL,"Spectrum Pharmaceuticals, Inc",,1.0,,,f,,,,,t,f,,,,,,,,,NO,,2024-08-09 22:00:39.992088,2024-08-09 22:00:39.992088,INDUSTRY,,,,,,,success
1,1,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,adenocarcinoma of the esophagus,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2010-05-21,2015-09-15,,2016-04-25,2010-05-21,2010-05-24,ESTIMATED,2015-09-15,2015-10-16,ESTIMATED,,,,2016-04-25,2016-06-01,ESTIMATED,2010-07,,2010-07-31,2016-04,2016-04-30,,,,2012-09,ACTUAL,2012-09-30,,INTERVENTIONAL,,,Pralatrexate and Docetaxel in Treating Patient...,Phase II Study of Pralatrexate and Docetaxel i...,COMPLETED,,PHASE2,6.0,ACTUAL,Ohio State University Comprehensive Cancer Center,,1.0,,,f,,,,t,,,,,,,,,,,,,2024-08-03 23:37:43.166171,2024-08-03 23:37:43.166171,OTHER,,,,,,,success
2,2,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,adenocarcinomas of the gastroesophageal junction,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2010-05-21,2015-09-15,,2016-04-25,2010-05-21,2010-05-24,ESTIMATED,2015-09-15,2015-10-16,ESTIMATED,,,,2016-04-25,2016-06-01,ESTIMATED,2010-07,,2010-07-31,2016-04,2016-04-30,,,,2012-09,ACTUAL,2012-09-30,,INTERVENTIONAL,,,Pralatrexate and Docetaxel in Treating Patient...,Phase II Study of Pralatrexate and Docetaxel i...,COMPLETED,,PHASE2,6.0,ACTUAL,Ohio State University Comprehensive Cancer Center,,1.0,,,f,,,,t,,,,,,,,,,,,,2024-08-03 23:37:43.166171,2024-08-03 23:37:43.166171,OTHER,,,,,,,success
3,3,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,recurrent esophageal cancer,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2010-05-21,2015-09-15,,2016-04-25,2010-05-21,2010-05-24,ESTIMATED,2015-09-15,2015-10-16,ESTIMATED,,,,2016-04-25,2016-06-01,ESTIMATED,2010-07,,2010-07-31,2016-04,2016-04-30,,,,2012-09,ACTUAL,2012-09-30,,INTERVENTIONAL,,,Pralatrexate and Docetaxel in Treating Patient...,Phase II Study of Pralatrexate and Docetaxel i...,COMPLETED,,PHASE2,6.0,ACTUAL,Ohio State University Comprehensive Cancer Center,,1.0,,,f,,,,t,,,,,,,,,,,,,2024-08-03 23:37:43.166171,2024-08-03 23:37:43.166171,OTHER,,,,,,,success
4,4,NCT01129206,DRUG,IVP(intravenous push)over 3-5 minutes on day 1...,pralatrexate,squamous cell carcinoma of the esophagus,2935,pralatrexate,Launched,10-propargyl-10-deazaaminopterin; DHFR inhibit...,SRI International,SRI International (US),Acrotech Biopharma Inc; CASI Pharmaceuticals I...,Acrotech Biopharma Inc (US); CASI Pharmaceutic...,Cancer,Anaplastic large cell lymphoma; Angioimmunobla...,Anticancer; Apoptosis stimulator; DHFR inhibit...,Intravenous formulation; Small molecule therap...,Accelerated Approval; Fast Track; Orphan Drug,B-cell lymphoma; Breast tumor; Cancer; Diffuse...,Allos Therapeutics Inc; Baxter Oncology GmbH; ...,Yes,2024-02-19,1998-06-08,2009-10-05,Former licensee Allos Therapeutics (a subsidia...,17990,pralatrexate,,2010-05-21,2015-09-15,,2016-04-25,2010-05-21,2010-05-24,ESTIMATED,2015-09-15,2015-10-16,ESTIMATED,,,,2016-04-25,2016-06-01,ESTIMATED,2010-07,,2010-07-31,2016-04,2016-04-30,,,,2012-09,ACTUAL,2012-09-30,,INTERVENTIONAL,,,Pralatrexate and Docetaxel in Treating Patient...,Phase II Study of Pralatrexate and Docetaxel i...,COMPLETED,,PHASE2,6.0,ACTUAL,Ohio State University Comprehensive Cancer Center,,1.0,,,f,,,,t,,,,,,,,,,,,,2024-08-03 23:37:43.166171,2024-08-03 23:37:43.166171,OTHER,,,,,,,success


In [46]:
# list all trial statuses
merged_df_1021_copy['overall_status'].unique()

array(['COMPLETED', 'ACTIVE_NOT_RECRUITING', 'RECRUITING', 'UNKNOWN',
       'WITHDRAWN', 'NOT_YET_RECRUITING', 'TERMINATED', 'SUSPENDED',
       'ENROLLING_BY_INVITATION', 'NO_LONGER_AVAILABLE', 'AVAILABLE',
       'APPROVED_FOR_MARKETING', 'TEMPORARILY_NOT_AVAILABLE'],
      dtype=object)

In [47]:
# drop unwanted Trial Overall Status
merged_df_1021_copy = merged_df_1021_copy[~(merged_df_1021_copy['overall_status'].isin(['ACTIVE_NOT_RECRUITING',
                                                                'RECRUITING', 'UNKNOWN', 
                                                                'NOT_YET_RECRUITING','ENROLLING_BY_INVITATION',
                                                                'NO_LONGER_AVAILABLE','AVAILABLE',
                                                                'TEMPORARILY_NOT_AVAILABLE']))]

In [69]:
merged_df_1021_copy.shape

(115363, 99)

In [57]:
merged_df_1021_copy.to_csv('merged_df_1021_copy.txt', sep='|', index=True)

In [41]:
# merged_df_1020 = merged_df_1020[(merged_df_1020['phase'] == 'PHASE2') | (merged_df_1020['phase'] == 'PHASE2/PHASE3')]

In [55]:
merged_df_1021_copy.groupby('drug_outcome')['cortellis_cleaned_drug'].nunique()

drug_outcome
failure    1874
success    1254
Name: cortellis_cleaned_drug, dtype: int64

In [58]:
merged_df_1021_copy.groupby('Highest Status')['cortellis_cleaned_drug'].nunique()

Highest Status
Discontinued               1054
Launched                   1166
No Development Reported     764
Pre-registration             50
Registered                   39
Suspended                    27
Withdrawn                    29
Name: cortellis_cleaned_drug, dtype: int64

In [56]:
merged_df_1021_copy.groupby('overall_status')['cortellis_cleaned_drug'].nunique()

overall_status
APPROVED_FOR_MARKETING      50
COMPLETED                 2643
SUSPENDED                  163
TERMINATED                1399
WITHDRAWN                  718
Name: cortellis_cleaned_drug, dtype: int64

In [88]:
# interventions table from AACT with drug name and NCT mapping
merged_df_1021_copy = pd.read_csv('merged_df_1021_copy.txt', sep='|', low_memory=False)

In [89]:
merged_df_1021_copy[merged_df_1021_copy['conditions'] == 'critical illness']

Unnamed: 0.2,Unnamed: 0.1,nct_id,intervention_type,description,trial_drug_cleaned,conditions,Unnamed: 0,Drug Name,Highest Status,Other Drug Names,Originator Company,Originator Company HQ,Active Companies,Active Companies HQ,Therapy Area,Active Indications,Action,Technologies,Regulatory Designations,Inactive Indications,Inactive Companies,Has Deals,Last Change Date,Added Date,First Launched Date,Extract,Drug Id,cortellis_cleaned_drug,nlm_download_date_description,study_first_submitted_date,results_first_submitted_date,disposition_first_submitted_date,last_update_submitted_date,study_first_submitted_qc_date,study_first_posted_date,study_first_posted_date_type,results_first_submitted_qc_date,results_first_posted_date,results_first_posted_date_type,disposition_first_submitted_qc_date,disposition_first_posted_date,disposition_first_posted_date_type,last_update_submitted_qc_date,last_update_posted_date,last_update_posted_date_type,start_month_year,start_date_type,start_date,verification_month_year,verification_date,completion_month_year,completion_date_type,completion_date,primary_completion_month_year,primary_completion_date_type,primary_completion_date,target_duration,study_type,acronym,baseline_population,brief_title,official_title,overall_status,last_known_status,phase,enrollment,enrollment_type,source,limitations_and_caveats,number_of_arms,number_of_groups,why_stopped,has_expanded_access,expanded_access_type_individual,expanded_access_type_intermediate,expanded_access_type_treatment,has_dmc,is_fda_regulated_drug,is_fda_regulated_device,is_unapproved_device,is_ppsd,is_us_export,biospec_retention,biospec_description,ipd_time_frame,ipd_access_criteria,ipd_url,plan_to_share_ipd,plan_to_share_ipd_description,created_at,updated_at,source_class,delayed_posting,expanded_access_nctid,expanded_access_status_for_nctid,fdaaa801_violation,baseline_type_units_analyzed,patient_registry,drug_outcome
9848,19894,NCT00405847,DRUG,,dexmedetomidine,critical illness,962,dexmedetomidine,Launched,DA-9501; Dexdor; MPV-1440; PF-00914730; Preced...,Orion Corp,Orion Corp (Finland),Hospira Inc; Orion Corp,Hospira Inc (US); Orion Corp (Finland),Neurology/Psychiatric,Anesthesia,Alpha 2 adrenoceptor agonist; Analgesic; Antiv...,Infusion; Intravenous formulation; Small molec...,,Delirium,Abbott Laboratories; Dainabot KK; Maruishi Pha...,Yes,2024-07-07,1996-02-16,2000-03-28,"Developed and launched by Orion and Abbott , a...",8783,dexmedetomidine,,2006-11-29,,,2016-10-26,2006-11-29,2006-11-30,ESTIMATED,,,,,,,2016-10-26,2016-10-27,ESTIMATED,2006-07,,2006-07-31,2016-10,2016-10-31,2007-05,ACTUAL,2007-05-31,,,,,INTERVENTIONAL,,,A Pilot Study of Effect of Dexmedetomidine on ...,A Pilot Study of Effect of Dexmedetomidine on ...,COMPLETED,,PHASE4,17.0,ACTUAL,University of Arizona,,,,,f,,,,,,,,,,,,,,,,,2024-08-04 10:24:54.512817,2024-08-04 10:24:54.512817,OTHER,,,,,,,success
10027,20237,NCT00744380,DRUG,Dexmedetomidine 0.15 µg/kg per hour (final inf...,dexmedetomidine,critical illness,962,dexmedetomidine,Launched,DA-9501; Dexdor; MPV-1440; PF-00914730; Preced...,Orion Corp,Orion Corp (Finland),Hospira Inc; Orion Corp,Hospira Inc (US); Orion Corp (Finland),Neurology/Psychiatric,Anesthesia,Alpha 2 adrenoceptor agonist; Analgesic; Antiv...,Infusion; Intravenous formulation; Small molec...,,Delirium,Abbott Laboratories; Dainabot KK; Maruishi Pha...,Yes,2024-07-07,1996-02-16,2000-03-28,"Developed and launched by Orion and Abbott , a...",8783,dexmedetomidine,,2008-08-28,2016-03-09,,2016-08-04,2008-08-29,2008-09-01,ESTIMATED,2016-08-04,2016-09-28,ESTIMATED,,,,2016-08-04,2016-09-28,ESTIMATED,2008-08,,2008-08-31,2016-08,2016-08-31,2012-10,ACTUAL,2012-10-31,2012-10,ACTUAL,2012-10-31,,INTERVENTIONAL,,,Dexmedetomidine Versus Midazolam for Facilitat...,Dexmedetomidine vs. Midazolam for Facilitating...,COMPLETED,,,23.0,ACTUAL,"University of Colorado, Denver",,2.0,,,f,,,,f,,,,,,,,,,,YES,Publication,2024-08-05 17:19:45.411903,2024-08-05 17:19:45.411903,OTHER,,,,,,,success
10112,20360,NCT02903407,DRUG,IV dexmedetomidine will be administered as a c...,dexmedetomidine,critical illness,962,dexmedetomidine,Launched,DA-9501; Dexdor; MPV-1440; PF-00914730; Preced...,Orion Corp,Orion Corp (Finland),Hospira Inc; Orion Corp,Hospira Inc (US); Orion Corp (Finland),Neurology/Psychiatric,Anesthesia,Alpha 2 adrenoceptor agonist; Analgesic; Antiv...,Infusion; Intravenous formulation; Small molec...,,Delirium,Abbott Laboratories; Dainabot KK; Maruishi Pha...,Yes,2024-07-07,1996-02-16,2000-03-28,"Developed and launched by Orion and Abbott , a...",8783,dexmedetomidine,,2016-09-02,2020-06-19,,2020-07-10,2016-09-12,2016-09-16,ESTIMATED,2020-07-10,2020-07-13,ACTUAL,,,,2020-07-10,2020-07-13,ACTUAL,2017-09-01,ACTUAL,2017-09-01,2020-07,2020-07-31,2019-06-30,ACTUAL,2019-06-30,2019-06-30,ACTUAL,2019-06-30,,INTERVENTIONAL,,,"Pain, Agitation and Delirium (PAD) Protocol in...","Pain, Agitation and Delirium Protocol in Venti...",TERMINATED,,PHASE4,7.0,ACTUAL,Duke University,Our trial was terminated prior to goal enrollm...,2.0,,All enrolled participants completed the study ...,f,,,,t,t,f,,,t,,,,,,NO,,2024-08-04 06:52:48.585636,2024-08-04 06:52:48.585636,OTHER,,,,,,,success
10115,20363,NCT02903407,DRUG,IV propofol will be administered as a continuo...,propofol,critical illness,1931,propofol,Launched,Diprivan; Diprivan EDTA; ICI-35868; propofol; ...,Imperial Chemical Industries Plc,Imperial Chemical Industries Plc (UK),Aspen PharmaCare Holdings Ltd; Fresenius Kabi ...,Aspen PharmaCare Holdings Ltd (South Africa); ...,Neurology/Psychiatric,Anesthesia,Anxiolytic; General anesthetic; Hypnotic; Unsp...,Injectable emulsion; Intravenous formulation; ...,,,AstraZeneca plc; Imperial Chemical Industries Plc,Yes,2024-07-06,2003-03-25,1986-12-31,Developed and launched by AstraZeneca (previou...,44308,propofol,,2016-09-02,2020-06-19,,2020-07-10,2016-09-12,2016-09-16,ESTIMATED,2020-07-10,2020-07-13,ACTUAL,,,,2020-07-10,2020-07-13,ACTUAL,2017-09-01,ACTUAL,2017-09-01,2020-07,2020-07-31,2019-06-30,ACTUAL,2019-06-30,2019-06-30,ACTUAL,2019-06-30,,INTERVENTIONAL,,,"Pain, Agitation and Delirium (PAD) Protocol in...","Pain, Agitation and Delirium Protocol in Venti...",TERMINATED,,PHASE4,7.0,ACTUAL,Duke University,Our trial was terminated prior to goal enrollm...,2.0,,All enrolled participants completed the study ...,f,,,,t,t,f,,,t,,,,,,NO,,2024-08-04 06:52:48.585636,2024-08-04 06:52:48.585636,OTHER,,,,,,,success
10656,21287,NCT01059929,DRUG,continuous IV infusion 0.2 - 1.5 ucg/kg/hour t...,dexmedetomidine,critical illness,962,dexmedetomidine,Launched,DA-9501; Dexdor; MPV-1440; PF-00914730; Preced...,Orion Corp,Orion Corp (Finland),Hospira Inc; Orion Corp,Hospira Inc (US); Orion Corp (Finland),Neurology/Psychiatric,Anesthesia,Alpha 2 adrenoceptor agonist; Analgesic; Antiv...,Infusion; Intravenous formulation; Small molec...,,Delirium,Abbott Laboratories; Dainabot KK; Maruishi Pha...,Yes,2024-07-07,1996-02-16,2000-03-28,"Developed and launched by Orion and Abbott , a...",8783,dexmedetomidine,,2009-09-14,2016-11-01,,2018-07-02,2010-01-28,2010-02-01,ESTIMATED,2018-07-02,2018-07-03,ACTUAL,,,,2018-07-02,2018-07-03,ACTUAL,2009-09,,2009-09-30,2018-07,2018-07-31,2015-09,ACTUAL,2015-09-30,2015-09,ACTUAL,2015-09-30,,INTERVENTIONAL,,,Dexmedetomidine Versus Propofol in the Medical...,A Double-blinded Randomized Controlled Trial o...,TERMINATED,,PHASE4,41.0,ACTUAL,University of Chicago,,2.0,,drug and placebo unavailable,f,,,,t,,,,,,,,,,,NO,,2024-08-05 13:43:19.820148,2024-08-05 13:43:19.820148,OTHER,,,,,,,success
10658,21289,NCT01059929,DRUG,continuous IV infusion (5 - 50 ucg/kg/min) tit...,propofol,critical illness,1931,propofol,Launched,Diprivan; Diprivan EDTA; ICI-35868; propofol; ...,Imperial Chemical Industries Plc,Imperial Chemical Industries Plc (UK),Aspen PharmaCare Holdings Ltd; Fresenius Kabi ...,Aspen PharmaCare Holdings Ltd (South Africa); ...,Neurology/Psychiatric,Anesthesia,Anxiolytic; General anesthetic; Hypnotic; Unsp...,Injectable emulsion; Intravenous formulation; ...,,,AstraZeneca plc; Imperial Chemical Industries Plc,Yes,2024-07-06,2003-03-25,1986-12-31,Developed and launched by AstraZeneca (previou...,44308,propofol,,2009-09-14,2016-11-01,,2018-07-02,2010-01-28,2010-02-01,ESTIMATED,2018-07-02,2018-07-03,ACTUAL,,,,2018-07-02,2018-07-03,ACTUAL,2009-09,,2009-09-30,2018-07,2018-07-31,2015-09,ACTUAL,2015-09-30,2015-09,ACTUAL,2015-09-30,,INTERVENTIONAL,,,Dexmedetomidine Versus Propofol in the Medical...,A Double-blinded Randomized Controlled Trial o...,TERMINATED,,PHASE4,41.0,ACTUAL,University of Chicago,,2.0,,drug and placebo unavailable,f,,,,t,,,,,,,,,,,NO,,2024-08-05 13:43:19.820148,2024-08-05 13:43:19.820148,OTHER,,,,,,,success
10690,21343,NCT01050699,DRUG,Intravenous continuous infusion will be initia...,dexmedetomidine,critical illness,962,dexmedetomidine,Launched,DA-9501; Dexdor; MPV-1440; PF-00914730; Preced...,Orion Corp,Orion Corp (Finland),Hospira Inc; Orion Corp,Hospira Inc (US); Orion Corp (Finland),Neurology/Psychiatric,Anesthesia,Alpha 2 adrenoceptor agonist; Analgesic; Antiv...,Infusion; Intravenous formulation; Small molec...,,Delirium,Abbott Laboratories; Dainabot KK; Maruishi Pha...,Yes,2024-07-07,1996-02-16,2000-03-28,"Developed and launched by Orion and Abbott , a...",8783,dexmedetomidine,,2010-01-14,,,2021-08-02,2010-01-14,2010-01-15,ESTIMATED,,,,,,,2021-08-02,2021-08-04,ACTUAL,2009-08,,2009-08-31,2021-08,2021-08-31,2019-01,ACTUAL,2019-01-31,2019-01,ACTUAL,2019-01-31,,INTERVENTIONAL,,,Sleep Intervention During Acute Lung Injury,Sleep Intervention During Acute Lung Injury,COMPLETED,,PHASE4,90.0,ESTIMATED,University of Arizona,,2.0,,,f,,,,t,,,,,,,,,,,,,2024-08-07 16:26:38.454747,2024-08-07 16:26:38.454747,OTHER,,,,,,,success
10736,21410,NCT03653832,DRUG,Patients will commence intravenous infusion of...,dexmedetomidine,critical illness,962,dexmedetomidine,Launched,DA-9501; Dexdor; MPV-1440; PF-00914730; Preced...,Orion Corp,Orion Corp (Finland),Hospira Inc; Orion Corp,Hospira Inc (US); Orion Corp (Finland),Neurology/Psychiatric,Anesthesia,Alpha 2 adrenoceptor agonist; Analgesic; Antiv...,Infusion; Intravenous formulation; Small molec...,,Delirium,Abbott Laboratories; Dainabot KK; Maruishi Pha...,Yes,2024-07-07,1996-02-16,2000-03-28,"Developed and launched by Orion and Abbott , a...",8783,dexmedetomidine,,2018-08-21,,,2024-05-07,2018-08-28,2018-08-31,ACTUAL,,,,,,,2024-05-07,2024-05-08,ACTUAL,2018-12-10,ACTUAL,2018-12-10,2024-05,2024-05-31,2023-12-15,ACTUAL,2023-12-15,2023-10-31,ACTUAL,2023-10-31,,INTERVENTIONAL,A2B,,Alpha 2 Agonists for Sedation to Produce Bette...,Alpha 2 Agonists for Sedation to Produce Bette...,COMPLETED,,PHASE3,1437.0,ACTUAL,University of Edinburgh,,3.0,,,f,,,,t,f,f,,,f,,,Currently unknown,Currently unspecified,,YES,The final trial dataset will be held by the Un...,2024-08-07 17:52:54.33458,2024-08-07 17:52:54.33458,OTHER,,,,,,,success
10737,21411,NCT03653832,DRUG,Patients will continue to receive intravenous ...,propofol,critical illness,1931,propofol,Launched,Diprivan; Diprivan EDTA; ICI-35868; propofol; ...,Imperial Chemical Industries Plc,Imperial Chemical Industries Plc (UK),Aspen PharmaCare Holdings Ltd; Fresenius Kabi ...,Aspen PharmaCare Holdings Ltd (South Africa); ...,Neurology/Psychiatric,Anesthesia,Anxiolytic; General anesthetic; Hypnotic; Unsp...,Injectable emulsion; Intravenous formulation; ...,,,AstraZeneca plc; Imperial Chemical Industries Plc,Yes,2024-07-06,2003-03-25,1986-12-31,Developed and launched by AstraZeneca (previou...,44308,propofol,,2018-08-21,,,2024-05-07,2018-08-28,2018-08-31,ACTUAL,,,,,,,2024-05-07,2024-05-08,ACTUAL,2018-12-10,ACTUAL,2018-12-10,2024-05,2024-05-31,2023-12-15,ACTUAL,2023-12-15,2023-10-31,ACTUAL,2023-10-31,,INTERVENTIONAL,A2B,,Alpha 2 Agonists for Sedation to Produce Bette...,Alpha 2 Agonists for Sedation to Produce Bette...,COMPLETED,,PHASE3,1437.0,ACTUAL,University of Edinburgh,,3.0,,,f,,,,t,f,f,,,f,,,Currently unknown,Currently unspecified,,YES,The final trial dataset will be held by the Un...,2024-08-07 17:52:54.33458,2024-08-07 17:52:54.33458,OTHER,,,,,,,success
30519,62802,NCT01369147,DRUG,The PN dose will be written for each 24-hr per...,propofol,critical illness,1931,propofol,Launched,Diprivan; Diprivan EDTA; ICI-35868; propofol; ...,Imperial Chemical Industries Plc,Imperial Chemical Industries Plc (UK),Aspen PharmaCare Holdings Ltd; Fresenius Kabi ...,Aspen PharmaCare Holdings Ltd (South Africa); ...,Neurology/Psychiatric,Anesthesia,Anxiolytic; General anesthetic; Hypnotic; Unsp...,Injectable emulsion; Intravenous formulation; ...,,,AstraZeneca plc; Imperial Chemical Industries Plc,Yes,2024-07-06,2003-03-25,1986-12-31,Developed and launched by AstraZeneca (previou...,44308,propofol,,2011-05-25,2022-04-26,2022-01-24,2022-05-18,2011-06-07,2011-06-08,ESTIMATED,2022-05-18,2022-06-10,ACTUAL,2022-01-24,2022-01-27,ACTUAL,2022-05-18,2022-06-10,ACTUAL,2011-07,ACTUAL,2011-07-31,2022-05,2022-05-31,2014-12-02,ACTUAL,2014-12-02,2014-12-02,ACTUAL,2014-12-02,,INTERVENTIONAL,,This analysis includes participants who have b...,The Energy Dose Study,Comparative Effectiveness of Energy Doses in C...,TERMINATED,,PHASE2,12.0,ACTUAL,Emory University,,3.0,,This study was halted pending additional fundi...,f,,,,t,t,f,,,,,,,,,NO,,2024-08-04 10:30:29.89002,2024-08-04 10:30:29.89002,OTHER,,,,,,,success


In [90]:
merged_df_1021_copy.shape

(115363, 99)

In [None]:
# consider only trials with Phase 2 status as completed, and which have Phase 4 status as well completed
# Label all trials with Phase 2 and Phase 4 completed as "success", and remaining Phase 2 trials with no Phase 4 as "failure"
mdf_1021_all = pd.read_csv('merged_df_1021_copy.txt', delimiter='|', index_col=0)

mdf_1021 = mdf_1021_all[
    'nct_id,trial_drug_cleaned,Therapy Area,conditions,cortellis_cleaned_drug,phase,Highest Status,overall_status,completion_date,drug_outcome'.split(',')
]

p2pairs = mdf_1021.drop_duplicates().query(
    'phase in ("PHASE1/PHASE2", "PHASE2")'
)

p3pairs = mdf_1021.drop_duplicates().query(
    'phase in ("PHASE2/PHASE3", "PHASE3")'
)

p4pairs = mdf_1021.drop_duplicates().query(
    'phase in ("PHASE4")'
)

mdf_1021_allp2 = mdf_1021_all.query(
    'phase in ("PHASE1/PHASE2", "PHASE2")'
)

df_p2_p4filter = mdf_1021_allp2.query('drug_outcome == "success"').pipe(
    lambda df: df.merge(
        df.query('overall_status.str.upper() == "COMPLETED"')[
            ['trial_drug_cleaned', 'conditions']
        ].drop_duplicates(),
        on=['trial_drug_cleaned', 'conditions']
    )
).merge(
    p4pairs[
        ['trial_drug_cleaned', 'conditions']
    ].drop_duplicates(),
    on=['trial_drug_cleaned', 'conditions']
).pipe(
    lambda df: pd.concat([
        df,
        mdf_1021_allp2.query('drug_outcome == "failure"')
    ])
)

In [None]:
# export Phase2_Phase4 trial data
df_p2_p4filter.to_csv('merged_df_1021_p2_p4filter.csv', sep='|', index=True)