# Linking Studies in Clinical Trials

- We need to link clinical trials together to see if any drugs failed with no results

In [2]:
# Criteria
# From the same company/sponsor
# Try out the same condition/treatment

In [3]:
import pandas as pd

## Getting Raw information from Studies

In [32]:
def create_planned_measurements(studies):
    df = {
        'study': [],
        'title': [],
        'description': [],
        'timeframe': [],
        'type': []
    }
    for study in studies:
        study_id = study['Study']['ProtocolSection']['IdentificationModule']['NCTId']
        if ('OutcomesModule' not in study['Study']['ProtocolSection']):
            continue
        primary_measures = study['Study']['ProtocolSection']['OutcomesModule'].get('PrimaryOutcomeList', {'PrimaryOutcome': []})['PrimaryOutcome']
        secondary_measures = study['Study']['ProtocolSection']['OutcomesModule'].get('SecondaryOutcomeList', {'SecondaryOutcome': []})['SecondaryOutcome']


        for measure in primary_measures:
            df['study'].append(study_id)
            df['title'].append(measure.get('PrimaryOutcomeMeasure', 'NA'))
            df['description'].append(measure.get('PrimaryOutcomeDescription', 'NA'))
            df['timeframe'].append(measure.get('PrimaryOutcomeTimeFrame', 'NA'))
            df['type'].append('primary')
            
        for measure in secondary_measures:
            df['study'].append(study_id)
            df['title'].append(measure.get('SecondaryOutcomeMeasure', 'NA'))
            df['description'].append(measure.get('SecondaryOutcomeDescription', 'NA'))
            df['timeframe'].append(measure.get('SecondaryOutcomeTimeFrame', 'NA'))
            df['type'].append('secondary')


    return df

In [46]:
def create_studies_table(studies):
    buffer = {
        'study_id': [], 'official_title': [], 'short_title':[], 'conditions': [], 
        'verified_date': [], 'responsible_party': [], 'sponsor':[], 'type': [], 'description': [],
        'interventions': [], 'purpose': [], 'intervention_type': [], 'mesh_terms': [],
        'criteria': [], 'min_age': [], 'max_age': [], 'gender': [], 'org_full_name': [],
        'org_class': [], 'overall_status': [], 'last_known_status': [], 'responsible_party_type': [],
        'why_stopped': [], 'phase': []
    }
    for i, study in enumerate(studies):


        try:
            buffer['study_id'].append(study['Study']['ProtocolSection']['IdentificationModule']['NCTId'])
        except KeyError as e:
            buffer['study_id'].append('NA')

        try:
            buffer['official_title'].append(study['Study']['ProtocolSection']['IdentificationModule']['OfficialTitle'])
        except KeyError as e:
            buffer['official_title'].append('NA')
            
        try:
            buffer['short_title'].append(study['Study']['ProtocolSection']['IdentificationModule']['BriefTitle'])
        except KeyError as e:
            buffer['short_title'].append('NA')

        try:
            buffer['verified_date'].append(study['Study']['ProtocolSection']['StatusModule']['StatusVerifiedDate'])
        except KeyError as e:
            buffer['verified_date'].append('NA')

        try:
            buffer['responsible_party'].append(study['Study']['ProtocolSection']['SponsorCollaboratorsModule']['ResponsibleParty']['ResponsiblePartyInvestigatorFullName'])
        except KeyError as e:
            buffer['responsible_party'].append('NA')
            
        try:
            buffer['responsible_party_type'].append(study['Study']['ProtocolSection']['SponsorCollaboratorsModule']['ResponsibleParty']['ResponsiblePartyType'])
        except KeyError as e:
            buffer['responsible_party_type'].append('NA')
            
        try:
            buffer['sponsor'].append(study['Study']['ProtocolSection']['SponsorCollaboratorsModule']['LeadSponsor']['LeadSponsorName'])
        except KeyError as e:
            buffer['sponsor'].append('NA')

        try: 
            buffer['conditions'].append(study['Study']['ProtocolSection']['ConditionsModule']['ConditionList']['Condition'])
        except KeyError as e:
            buffer['conditions'].append('NA')

        try:
            buffer['type'].append(study['Study']['ProtocolSection']['DesignModule']['StudyType'])
        except KeyError as e:
            buffer['type'].append('NA')
        
        try:
            buffer['purpose'].append(study['Study']['ProtocolSection']['DesignModule']['DesignInfo'].get('DesignPrimaryPurpose', 'NA'))
        except KeyError as e:
            buffer['purpose'].append('NA')
            
        try:
            buffer['intervention_type'].append(study['Study']['ProtocolSection']['DesignModule']['DesignInfo'].get('DesignInterventionModel', 'NA'))
        except KeyError as e:
            buffer['intervention_type'].append('NA')
        
        try:
            buffer['mesh_terms'].append([x.get('ConditionMeshTerm', 'NA') for x in study['Study']['DerivedSection']['ConditionBrowseModule']['ConditionMeshList']['ConditionMesh']])
        except KeyError as e:
            buffer['mesh_terms'].append([])
            
        try:
            buffer['description'].append(study['Study']['ProtocolSection']['DescriptionModule']['BriefSummary'])
        except KeyError as e:
            buffer['description'].append('NA')
            
        try: 
            buffer['interventions'].append([x.get('InterventionMeshTerm', 'NA') for x in study['Study']['DerivedSection']['InterventionBrowseModule']['InterventionMeshList']['InterventionMesh']])
        except KeyError as e:
            buffer['interventions'].append([])
            
        try:
            buffer['criteria'].append(study['Study']['ProtocolSection']['EligibilityModule']['EligibilityCriteria'])
        except KeyError as e:
            buffer['criteria'].append('NA')
            
        try:
            buffer['gender'].append(study['Study']['ProtocolSection']['EligibilityModule']['Gender'])
        except KeyError as e:
            buffer['gender'].append('NA')
        
        try:
            buffer['min_age'].append(study['Study']['ProtocolSection']['EligibilityModule']['MinimumAge'])
        except KeyError as e:
            buffer['min_age'].append('NA')
            
        try:
            buffer['max_age'].append(study['Study']['ProtocolSection']['EligibilityModule']['MaximumAge'])
        except KeyError as e:
            buffer['max_age'].append('NA')
                    
        try:
            buffer['org_full_name'].append(study['Study']['ProtocolSection']['IdentificationModule']['Organization']['OrgFullName'])
        except KeyError as e:
            buffer['org_full_name'].append('NA')
                        
        try:
            buffer['org_class'].append(study['Study']['ProtocolSection']['IdentificationModule']['Organization']['OrgClass'])
        except KeyError as e:
            buffer['org_class'].append('NA')
            
        try:
            buffer['overall_status'].append(study['Study']['ProtocolSection']['StatusModule']['OverallStatus'])
        except KeyError as e:
            buffer['overall_status'].append('NA')
            
        try:
            buffer['last_known_status'].append(study['Study']['ProtocolSection']['StatusModule']['LastKnownStatus'])
        except KeyError as e:
            buffer['last_known_status'].append('NA')
            
        try:
            buffer['why_stopped'].append(study['Study']['ProtocolSection']['StatusModule']['WhyStopped'])
        except KeyError as e:
            buffer['why_stopped'].append('NA')
            
        try:
            buffer['phase'].append(study['Study']['ProtocolSection']['DesignModule']['PhaseList']['Phase'])
        except KeyError as e:
            buffer['phase'].append([])
            
    return buffer
    

In [28]:
from os import listdir
from os.path import isfile, join, isdir
import json
import random
from tqdm import tqdm

def sample_all_studies(no_studies, table_func, table_name):
    # Need to add in MESH terms as a requirement
    study_directories = [f for f in listdir('AllAPIJSON/') if isdir(join('AllAPIJSON/', f))]
    random.shuffle(study_directories)
    studies = []
    write_counter = 0
    for directory in tqdm(study_directories):
        studyFileNames = [f for f in listdir('AllAPIJSON/'+directory+'/')]
        for file in studyFileNames:
            try:
                with open('AllAPIJSON/'+directory+'/'+file) as f:
                    data = json.load(f)['FullStudy']
                    studies.append(data)

                    f.close()
                    
                if len(studies) + len(studies)*write_counter >= no_studies:
                    print('writing study', write_counter)
                    table = pd.DataFrame.from_dict(table_func(studies))
                    table.to_pickle('all_'+table_name+'_'+str(write_counter)+'.pkl')
                    studies = []
                    return
                    
                if len(studies) >= 10000:
                    print('writing study', write_counter)
                    table = pd.DataFrame.from_dict(table_func(studies))
                    table.to_pickle('all_'+table_name+'_'+str(write_counter)+'.pkl')
                    write_counter += 1
                    studies = []

            except KeyError as e:
                print(str(e))
                continue

    table = pd.DataFrame.from_dict(table_func(studies))
    table.to_pickle('all_'+table_name+'_'+str(write_counter)+'.pkl')

    write_counter += 1
    studies = []
    
    
def sample_limited_studies(no_studies, table_func, table_name):
    # Need to add in MESH terms as a requirement
    studies = []
    write_counter = 0
    studyFileNames = [f for f in listdir('AllAPIJSON/NCT031xxxx/')]
    for file in studyFileNames:
        try:
            with open('AllAPIJSON/'+directory+'/'+file) as f:
                data = json.load(f)['FullStudy']
                studies.append(data)

                f.close()

            if len(studies) + len(studies)*write_counter >= no_studies:
                print('writing study', write_counter)
                table = pd.DataFrame.from_dict(table_func(studies))
                table.to_pickle('all_'+table_name+'_'+str(write_counter)+'.pkl')
                studies = []
                return

            if len(studies) >= 10000:
                print('writing study', write_counter)
                table = pd.DataFrame.from_dict(table_func(studies))
                table.to_pickle('all_'+table_name+'_'+str(write_counter)+'.pkl')
                write_counter += 1
                studies = []

        except KeyError:
            print(KeyError)
            continue

    table = pd.DataFrame.from_dict(table_func(studies))
    table.to_pickle('all_'+table_name+'_'+str(write_counter)+'.pkl')

    write_counter += 1
    studies = []

In [33]:
sample_all_studies(float('inf'), create_planned_measurements, 'measures_v2')

  2%|██▍                                                                                                      | 13/568 [00:03<02:09,  4.29it/s]

writing study 0


  5%|████▊                                                                                                    | 26/568 [00:10<06:12,  1.45it/s]

writing study 1


  7%|███████▏                                                                                                 | 39/568 [00:15<05:12,  1.69it/s]

writing study 2


  9%|█████████▌                                                                                               | 52/568 [00:20<03:30,  2.45it/s]

writing study 3


 11%|████████████                                                                                             | 65/568 [00:25<02:57,  2.84it/s]

writing study 4


 14%|██████████████▍                                                                                          | 78/568 [00:30<02:20,  3.49it/s]

writing study 5


 16%|████████████████▊                                                                                        | 91/568 [00:34<02:54,  2.73it/s]

writing study 6


 18%|███████████████████                                                                                     | 104/568 [00:39<02:36,  2.96it/s]

writing study 7


 21%|█████████████████████▌                                                                                  | 118/568 [00:44<01:43,  4.34it/s]

writing study 8


 23%|███████████████████████▉                                                                                | 131/568 [00:50<04:10,  1.74it/s]

writing study 9


 25%|██████████████████████████▎                                                                             | 144/568 [00:55<02:43,  2.60it/s]

writing study 10


 28%|████████████████████████████▋                                                                           | 157/568 [01:00<03:18,  2.07it/s]

writing study 11


 30%|███████████████████████████████▏                                                                        | 170/568 [01:04<03:39,  1.81it/s]

writing study 12


 32%|█████████████████████████████████▌                                                                      | 183/568 [01:09<03:28,  1.85it/s]

writing study 13


 35%|███████████████████████████████████▉                                                                    | 196/568 [01:14<03:26,  1.80it/s]

writing study 14


 37%|██████████████████████████████████████▎                                                                 | 209/568 [01:19<04:00,  1.50it/s]

writing study 15


 39%|████████████████████████████████████████▋                                                               | 222/568 [01:24<04:14,  1.36it/s]

writing study 16


 42%|███████████████████████████████████████████▏                                                            | 236/568 [01:29<02:48,  1.97it/s]

writing study 17


 44%|█████████████████████████████████████████████▌                                                          | 249/568 [01:34<03:38,  1.46it/s]

writing study 18


 46%|███████████████████████████████████████████████▉                                                        | 262/568 [01:39<02:32,  2.01it/s]

writing study 19


 47%|█████████████████████████████████████████████████▎                                                      | 269/568 [01:42<02:45,  1.81it/s]

writing study 20


 50%|███████████████████████████████████████████████████▋                                                    | 282/568 [01:46<01:06,  4.33it/s]

writing study 21


 52%|██████████████████████████████████████████████████████                                                  | 295/568 [01:51<01:05,  4.18it/s]

writing study 22


 54%|████████████████████████████████████████████████████████▍                                               | 308/568 [01:57<01:59,  2.18it/s]

writing study 23


 57%|██████████████████████████████████████████████████████████▊                                             | 321/568 [02:02<01:36,  2.57it/s]

writing study 24


 59%|█████████████████████████████████████████████████████████████▏                                          | 334/568 [02:07<01:31,  2.56it/s]

writing study 25


 61%|███████████████████████████████████████████████████████████████▌                                        | 347/568 [02:12<01:05,  3.35it/s]

writing study 26


 63%|█████████████████████████████████████████████████████████████████▉                                      | 360/568 [02:16<01:04,  3.21it/s]

writing study 27


 66%|████████████████████████████████████████████████████████████████████▎                                   | 373/568 [02:21<00:52,  3.72it/s]

writing study 28


 68%|██████████████████████████████████████████████████████████████████████▋                                 | 386/568 [02:26<00:45,  4.03it/s]

writing study 29


 70%|█████████████████████████████████████████████████████████████████████████                               | 399/568 [02:30<00:48,  3.52it/s]

writing study 30


 73%|███████████████████████████████████████████████████████████████████████████▍                            | 412/568 [02:37<01:58,  1.32it/s]

writing study 31


 75%|██████████████████████████████████████████████████████████████████████████████                          | 426/568 [02:42<01:18,  1.82it/s]

writing study 32


 77%|████████████████████████████████████████████████████████████████████████████████▍                       | 439/568 [02:47<01:05,  1.98it/s]

writing study 33


 80%|██████████████████████████████████████████████████████████████████████████████████▊                     | 452/568 [02:52<00:40,  2.89it/s]

writing study 34


 82%|█████████████████████████████████████████████████████████████████████████████████████▏                  | 465/568 [02:57<00:33,  3.11it/s]

writing study 35


 84%|███████████████████████████████████████████████████████████████████████████████████████▌                | 478/568 [03:02<00:22,  3.96it/s]

writing study 36


 86%|█████████████████████████████████████████████████████████████████████████████████████████▉              | 491/568 [03:07<00:41,  1.86it/s]

writing study 37


 89%|████████████████████████████████████████████████████████████████████████████████████████████▎           | 504/568 [03:12<00:28,  2.28it/s]

writing study 38


 91%|██████████████████████████████████████████████████████████████████████████████████████████████▋         | 517/568 [03:17<00:28,  1.77it/s]

writing study 39


 93%|█████████████████████████████████████████████████████████████████████████████████████████████████       | 530/568 [03:22<00:13,  2.73it/s]

writing study 40


 96%|███████████████████████████████████████████████████████████████████████████████████████████████████▌    | 544/568 [03:27<00:06,  3.68it/s]

writing study 41


 98%|█████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 557/568 [03:32<00:03,  3.49it/s]

writing study 42


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 568/568 [03:36<00:00,  2.62it/s]


## Exploration

In [36]:
studies_v2 = pd.read_pickle('all_studies_v2_0.pkl').reset_index(drop=True)

In [37]:
studies_v2.head()

Unnamed: 0,study_id,official_title,short_title,conditions,verified_date,responsible_party,sponsor,type,description,interventions,...,criteria,min_age,max_age,gender,org_full_name,org_class,overall_status,last_known_status,responsible_party_type,why_stopped
0,NCT02757378,A Randomized Controlled Trial of Manual Therap...,Manual Therapy as a Form of Sensory Discrimina...,[Low Back Pain],April 2016,Kevin Farrell,Kevin Farrell,Interventional,This study evaluates whether different explana...,[],...,Inclusion Criteria:\n\nAdults over the age of ...,18 Years,,All,St. Ambrose University,OTHER,Completed,,Sponsor-Investigator,
1,NCT02755129,FRailty WAlking Patterns (FRAP) Study,FRailty WAlking Patterns (FRAP) Study,[Heart Failure],March 2020,,Medtronic BRC,Interventional,The aim of this study is to evaluate sit-stand...,[],...,Inclusion Criteria:\n\nChronic Heart Failure i...,18 Years,,All,Medtronic BRC,INDUSTRY,Completed,,Sponsor,
2,NCT02759809,Optimizing Mothers' Milk for Preterm Infants (...,OptiMoM Kindergarten Study,"[Infant, Very Low Birth Weight]",January 2019,Deborah O'Connor,The Hospital for Sick Children,Observational,"In Canada, the leading cause of long-term disa...",[],...,Inclusion Criteria:\n\nChildren who were enrol...,5 Years,6 Years,All,The Hospital for Sick Children,OTHER,Completed,,Principal Investigator,
3,NCT02752438,Rescue High Frequency Oscillatory Ventilation ...,Rescue High Frequency Oscillatory Ventilation ...,[Acute Mechanical Ventilatory Failure],September 2016,Omer Erdeve,Ankara University,Observational,Although High Frequency Oscillatory Ventilatio...,[],...,Inclusion Criteria:\n\npatients switched to HF...,1 Day,28 Days,All,Ankara University,OTHER,Unknown status,Recruiting,Principal Investigator,
4,NCT02750956,Effect of Non-surgical Periodontal Treatment o...,"Endocan, VEGF and TNF-alpha Levels in Periodon...",[Periodontal Disease],April 2016,ÇİĞDEM COŞKUN TÜRER,Bulent Ecevit University,Observational,The primer aim of the study was to determine s...,[],...,Group 1 Inclusion Criteria:\n\nNo bone and att...,25 Years,49 Years,All,Bulent Ecevit University,OTHER,Completed,,Principal Investigator,


In [38]:
studies_v2['overall_status'].value_counts()

Completed                    5474
Recruiting                   1420
Unknown status               1412
Terminated                    544
Active, not recruiting        449
Withdrawn                     296
Not yet recruiting            270
Enrolling by invitation        75
Suspended                      26
Withheld                       15
No longer available            10
Approved for marketing          5
Available                       2
Temporarily not available       1
Name: overall_status, dtype: int64

In [40]:
studies_v2[studies_v2['overall_status'] == 'Terminated']['why_stopped']

38           Due to the heterogeneity in the first group.
40      Toxicity. Only enrolled patients in phase I po...
60                  Study data is currently under review.
72                          Difficulties with recruitment
91      Children could not be examined longer than 2 y...
                              ...                        
9541    recruitment is insufficient and the current co...
9586    Due to the SARS-CoV-2 pandemic in Taiwan in 20...
9629    This study is prematurely terminated due to ch...
9652    Amgen business decision to discontinue AMG 160...
9902    The protocol is determined to no longer be abl...
Name: why_stopped, Length: 544, dtype: object

## Linking

### Parsing Phase Data

In [4]:
studies = pd.concat([pd.read_pickle('all_studies_v2_'+str(x)+'.pkl') for x in range(44)])

In [5]:
studies.head()

Unnamed: 0,study_id,official_title,short_title,conditions,verified_date,responsible_party,sponsor,type,description,interventions,...,min_age,max_age,gender,org_full_name,org_class,overall_status,last_known_status,responsible_party_type,why_stopped,phase
0,NCT05614648,"A Phase 3, Prospective, Multicenter, Randomize...",Sciatica Epidural Radiculopathy Experimental N...,[Lumbosacral Radiculopathy],November 2022,,"Sollis Therapeutics, Inc.",Interventional,This study will evaluate the safety and effect...,[Clonidine],...,18 Years,70 Years,All,"Sollis Therapeutics, Inc.",INDUSTRY,Not yet recruiting,,Sponsor,,[Phase 3]
1,NCT05611333,BE-PHIT: a Behavioral Science-Enhanced PHysici...,A Behavioral Science-Enhanced PHysician-led Re...,"[Coronary Artery Disease, Physical Inactivity,...",November 2022,Simin G. Lee,Brigham and Women's Hospital,Interventional,Ischemic heart disease is the leading cause of...,[],...,40 Years,75 Years,Female,Brigham and Women's Hospital,OTHER,Recruiting,,Principal Investigator,,[Not Applicable]
2,NCT05610488,Intravitreal Faricimab in Diabetic Macular Ede...,Intravitreal Faricimab in Diabetic Macular Ede...,[Diabetic Macular Edema],November 2022,PD Dr. med. Katja Hatz,Vista Klinik,Interventional,Title: Intravitreal faricimab in diabetic macu...,[],...,18 Years,,All,Vista Klinik,OTHER,Not yet recruiting,,Principal Investigator,,[Phase 4]
3,NCT05616767,Prevention and Screening for Early Detection o...,Prevention and Screening for Early Detection o...,[HPV],November 2022,,University of Minnesota,Interventional,The long-term objective of the parent study is...,[],...,18 Years,,Male,University of Minnesota,OTHER,Not yet recruiting,,Sponsor,,[Not Applicable]
4,NCT05612724,Rutgers Pilot for Pragmatic Return to Effectiv...,Rutgers Pilot for PREDICT- Patient POC Test,[SARS CoV 2 Infection],November 2022,"Cecile A. Feldman, DMD","Rutgers, The State University of New Jersey",Interventional,A pilot study was initiated to assess feasibil...,[],...,18 Years,,All,"Rutgers, The State University of New Jersey",OTHER,Completed,,Principal Investigator,,[Not Applicable]


In [6]:
# Okay so it looks like the studies with two phases are combinations - lets stringify them
studies['phase'].str.join('/')

0              Phase 3
1       Not Applicable
2              Phase 4
3       Not Applicable
4       Not Applicable
             ...      
8298           Phase 4
8299    Not Applicable
8300           Phase 3
8301                  
8302                  
Name: phase, Length: 438303, dtype: object

In [7]:
studies['phase_str']  = studies['phase'].str.join('/')

In [8]:
studies['phase_str'].value_counts()

Not Applicable     155696
                    99807
Phase 2             53301
Phase 1             39107
Phase 3             35884
Phase 4             30377
Phase 1/Phase 2     13362
Phase 2/Phase 3      6349
Early Phase 1        4420
Name: phase_str, dtype: int64

## Target Data
- Let's see if we can't find some drug target data

In [91]:
studies[(studies['phase_str'] == 'Early Phase 1') & studies['description'].str.contains('target')][:40]

Unnamed: 0,study_id,official_title,short_title,conditions,verified_date,responsible_party,sponsor,type,description,interventions,...,gender,org_full_name,org_class,overall_status,last_known_status,responsible_party_type,why_stopped,phase,phase_str,conditions_str
857,NCT04785989,Metabolic Profiling of Leukemic Cells Through ...,In Vivo Metabolic Profiling of CLL (Chronic Ly...,[Chronic Lymphocytic Leukemia],June 2022,,"University of Wisconsin, Madison",Interventional,Metabolic reprogramming has been identified as...,[],...,All,"University of Wisconsin, Madison",OTHER,Recruiting,,Sponsor,,[Early Phase 1],Early Phase 1,Chronic Lymphocytic Leukemia
6896,NCT01205672,Evaluation of the Molecular Effects of Metform...,Evaluation of the Molecular Effects of Metform...,[Endometrial Cancer],May 2017,,M.D. Anderson Cancer Center,Interventional,The goal of this research study is to learn if...,[Metformin],...,Female,M.D. Anderson Cancer Center,OTHER,Completed,,Sponsor,,[Early Phase 1],Early Phase 1,Endometrial Cancer
9156,NCT04692571,EMG-Based Hand-Wrist Control: Study B: Compari...,EMG-Based Hand-Wrist Control: Study B Mirrored,"[Amputation, Amputation; Traumatic, Hand, Ampu...",December 2020,,"Liberating Technologies, Inc.",Interventional,Study assessing four-channel prosthesis contro...,[],...,All,"Liberating Technologies, Inc.",INDUSTRY,Completed,,Sponsor,,[Early Phase 1],Early Phase 1,"Amputation%%Amputation; Traumatic, Hand%%Amput..."
3942,NCT04264078,Anti-CD7 Universal CAR-T Cells for CD7+ T/NK C...,Anti-CD7 U-CAR-T Cell Therapy for T/NK Cell He...,"[T-cell Leukemia, T-cell Lymphoma]",June 2021,"Xi Zhang, MD",Xinqiao Hospital of Chongqing,Interventional,The prognosis of patients with relapsed and/or...,"[Fludarabine, Cyclophosphamide, Melphalan]",...,All,Xinqiao Hospital of Chongqing,OTHER,Recruiting,,Principal Investigator,,[Early Phase 1],Early Phase 1,T-cell Leukemia%%T-cell Lymphoma
5717,NCT03736343,Individual Differences in the Effects of Alcoh...,Impulsivity and Alcohol Response,"[Alcohol Use Disorder (AUD), Subjective Respon...",October 2022,,University of Florida,Interventional,"Impulsivity, a well-known risk factor predicti...",[],...,All,University of Florida,OTHER,"Active, not recruiting",,Sponsor,,[Early Phase 1],Early Phase 1,Alcohol Use Disorder (AUD)%%Subjective Respons...
6324,NCT03119558,18F-Florbetaben (Neuraceq®) PET/MRI Evaluation...,PET/MRI Evaluation of Cardiac Amyloid,[Cardiac Amyloidosis],July 2019,Andrei Iagaru,Stanford University,Interventional,Cardiac amyloidosis is a disorder characterize...,[],...,All,Stanford University,OTHER,Completed,,Principal Investigator,,[Early Phase 1],Early Phase 1,Cardiac Amyloidosis
6850,NCT02269540,A New Biomarker-Based Approach Towards Develop...,A New Treatment Approach for Major Depressive ...,[Major Depressive Disorder],May 2019,Jeff Meyer,Centre for Addiction and Mental Health,Interventional,The investigators will be looking at MAO-A den...,"[Acetylcysteine, Sertraline, Citalopram, N-mon...",...,All,Centre for Addiction and Mental Health,OTHER,Completed,,Principal Investigator,,[Early Phase 1],Early Phase 1,Major Depressive Disorder
7103,NCT02263417,The Safety and Efficacy of Long-term Treatment...,A Randomized Controlled Trail Comparing Subtha...,[Dystonia],January 2016,,"Beijing Pins Medical Co., Ltd",Interventional,The purpose of this study was to compare the s...,[],...,All,"Beijing Pins Medical Co., Ltd",INDUSTRY,Unknown status,Recruiting,Sponsor,,[Early Phase 1],Early Phase 1,Dystonia
8967,NCT04918914,Off Label Use of Dapsone in SARS-CoV-2 Hospita...,Critical Care Results of SARS-CoV-2 ARDS by Da...,"[SARS-CoV-2, Hypoxia, ARDS]",June 2021,Badar Kanwar,Hunt Regional Medical Center,Interventional,Abstract Background: Clinicians in pulmonary c...,[Dapsone],...,All,Hunt Regional Medical Center,OTHER,Recruiting,,Principal Investigator,,[Early Phase 1],Early Phase 1,SARS-CoV-2%%Hypoxia%%ARDS
270,NCT03937687,"The Effects of a Combination of TeaCrine®, Dyn...","The Effects of a Combination of TeaCrine®, Dyn...","[Caffeine, Teacrine, and Dynamine: Effects on ...",September 2021,Shawn M. Arent,University of South Carolina,Interventional,"The purpose of this randomized, placebo-contro...",[Caffeine],...,Male,University of South Carolina,OTHER,Completed,,Principal Investigator,,[Early Phase 1],Early Phase 1,"Caffeine, Teacrine, and Dynamine: Effects on P..."


In [87]:
studies[studies['description'].str.contains('mechanism')]

Unnamed: 0,study_id,official_title,short_title,conditions,verified_date,responsible_party,sponsor,type,description,interventions,...,gender,org_full_name,org_class,overall_status,last_known_status,responsible_party_type,why_stopped,phase,phase_str,conditions_str
9,NCT05612698,Efficacy of Different Modalities and Frequenci...,Efficacy of Physical Exercise on Glucose Contr...,[PreDiabetes],November 2022,Josep1,University of the Balearic Islands,Interventional,To assess the efficacy of different modalities...,[],...,All,University of the Balearic Islands,OTHER,Not yet recruiting,,Principal Investigator,,[Not Applicable],Not Applicable,PreDiabetes
78,NCT05613504,A Randomized Controlled Trial Study of Acupunc...,Study of Acupuncture in the Treatment of Hot F...,"[Hot Flashes, Breast Cancer, Acupuncture]",November 2022,Qianyan Liu,Nanjing University of Traditional Chinese Medi...,Interventional,This study intends to further evaluate the eff...,[],...,Female,Nanjing University of Traditional Chinese Medi...,OTHER,Not yet recruiting,,Principal Investigator,,[Not Applicable],Not Applicable,Hot Flashes%%Breast Cancer%%Acupuncture
111,NCT05616091,Brain Mechanisms of Intra- and Inter-individua...,Intra- and Inter-individual Differences of Pain,[Experimental Pain in Healthy Human Subjects],November 2022,Markus Ploner,Technical University of Munich,Interventional,Pain is a highly subjective and variable pheno...,[],...,All,Technical University of Munich,OTHER,Recruiting,,Principal Investigator,,[Not Applicable],Not Applicable,Experimental Pain in Healthy Human Subjects
121,NCT05613179,Brain Effect Mechanism of Lever Positioning Ma...,Brain Effect Mechanism of Lever Positioning Ma...,"[Lumbar Disc Herniation, Multimodal Brain Func...",November 2022,Zhou Xingchen,The Third Affiliated hospital of Zhejiang Chin...,Interventional,"In the early stage, two previous studies suppo...","[Ibuprofen, Analgesics]",...,All,The Third Affiliated hospital of Zhejiang Chin...,OTHER,"Active, not recruiting",,Principal Investigator,,[Not Applicable],Not Applicable,Lumbar Disc Herniation%%Multimodal Brain Funct...
276,NCT05617222,The Impact of Bed Rest and Aging on Muscle Mas...,"The Impact of Bed Rest, Aging and NMES on Skel...","[Disuse Atrophy (Muscle) of Lower Extremities,...",November 2022,Charlotte Suetta,Bispebjerg Hospital,Interventional,Loss of muscle mass is common phenotypic trait...,[],...,All,Bispebjerg Hospital,OTHER,Recruiting,,Principal Investigator,,[Not Applicable],Not Applicable,Disuse Atrophy (Muscle) of Lower Extremities%%...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8093,NCT00905203,Effects of Different Exercise Training Dose on...,Effects of Different Exercise Training Dose on...,[Postmenopausal Diabetes],June 2010,,National Taiwan University Hospital,Interventional,The purpose of this study is specifically to i...,[],...,Female,National Taiwan University Hospital,OTHER,Unknown status,Recruiting,,,[Not Applicable],Not Applicable,Postmenopausal Diabetes
8127,NCT00908115,Evaluation of Safety and Efficacy Through Post...,Post Market Surveillance for Infanrix™,"[Diphtheria, Acellular Pertussis, Tetanus]",December 2019,,GlaxoSmithKline,Observational,The purpose of this study was to investigate t...,[],...,All,GlaxoSmithKline,INDUSTRY,Completed,,Sponsor,,[],,Diphtheria%%Acellular Pertussis%%Tetanus
8189,NCT00907491,Use of Objective Cough Counting Device to Asse...,Accuracy of Cough Detection in Healthy Adults,[Cough],May 2009,,KarmelSonix Ltd.,Observational,Cough is part of the defense mechanism to prot...,[],...,All,KarmelSonix Ltd.,INDUSTRY,Completed,,,,[],,Cough
8239,NCT00900640,Reactions to Contrast Media (CM) Administered ...,Reactions to Contrast Media (CM) Administered ...,[Contrast Allergies],March 2017,,University of Florida,Observational,The aim of this study is to prospectively docu...,[],...,All,University of Florida,OTHER,Completed,,Sponsor,,[],,Contrast Allergies


### Grouping by Org

In [72]:
studies['sponsor'].value_counts()[:10]

GlaxoSmithKline                                                  3436
National Cancer Institute (NCI)                                  3325
AstraZeneca                                                      2860
Pfizer                                                           2856
Assistance Publique - Hôpitaux de Paris                          2677
Assiut University                                                2669
M.D. Anderson Cancer Center                                      2570
Mayo Clinic                                                      2557
Cairo University                                                 2459
National Institute of Allergy and Infectious Diseases (NIAID)    2298
Name: sponsor, dtype: int64

In [73]:
studies[studies['sponsor'] == 'GlaxoSmithKline']

Unnamed: 0,study_id,official_title,short_title,conditions,verified_date,responsible_party,sponsor,type,description,interventions,...,max_age,gender,org_full_name,org_class,overall_status,last_known_status,responsible_party_type,why_stopped,phase,phase_str
689,NCT05613205,"A Phase 1, Observer-blind, Randomised, Control...",Safety and Immunogenicity of a Novel Conjugate...,[Typhoid Fever],October 2022,,GlaxoSmithKline,Interventional,A bivalent Typhoid and Paratyphoid A conjugate...,[Vaccines],...,50 Years,All,GlaxoSmithKline,INDUSTRY,Not yet recruiting,,Sponsor,,[Phase 1],Phase 1
1253,NCT04789577,"A Phase I/II Observer-blind, Randomized, Place...",A Study to Evaluate the Safety and Immunogenic...,"[Influenza, Human]",November 2022,,GlaxoSmithKline,Interventional,Study to evaluate the safety and immunogenicit...,[],...,,All,GlaxoSmithKline,INDUSTRY,Completed,,Sponsor,,"[Phase 1, Phase 2]",Phase 1/Phase 2
1801,NCT00783003,"A Single Centre, Randomised, Placebo-controlle...",A Study to Assess the Safety and Pharmacokinet...,"[Pulmonary Disease, Chronic Obstructive]",July 2017,,GlaxoSmithKline,Interventional,GW642444 and GSK233705 are in development for ...,[],...,65 Years,All,GlaxoSmithKline,INDUSTRY,Completed,,Sponsor,,[Phase 1],Phase 1
2017,NCT00784550,"A Randomized, Double-Blind, Parallel-Group, 24...",A 24-Week Study to Evaluate the Safety and Eff...,"[Pulmonary Disease, Chronic Obstructive]",October 2016,,GlaxoSmithKline,Interventional,The purpose of the study is to determine the e...,"[Fluticasone, Xhance, Bromides, Tiotropium Bro...",...,99 Years,All,GlaxoSmithKline,INDUSTRY,Completed,,Sponsor,,[Phase 4],Phase 4
2174,NCT00783549,A Study in Healthy Volunteers of Single Doses ...,A Study in Healthy Volunteers of Single Doses ...,[Dyslipidaemias],July 2017,,GlaxoSmithKline,Interventional,This study is the first study in humans to ass...,[],...,55 Years,All,GlaxoSmithKline,INDUSTRY,Completed,,Sponsor,,[Phase 1],Phase 1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8111,NCT00903617,"A Two Part, Multicenter Phase IIa, Placebo Con...",Study to Test GSK256073 in Patients With Dysli...,"[Dyslipidaemias, Dyslipidemias]",November 2019,,GlaxoSmithKline,Interventional,This is a two part study (Part A and Part B) t...,"[Niacin, Nicotinic Acids]",...,75 Years,All,GlaxoSmithKline,INDUSTRY,Completed,,Sponsor,,[Phase 2],Phase 2
8127,NCT00908115,Evaluation of Safety and Efficacy Through Post...,Post Market Surveillance for Infanrix™,"[Diphtheria, Acellular Pertussis, Tetanus]",December 2019,,GlaxoSmithKline,Observational,The purpose of this study was to investigate t...,[],...,,All,GlaxoSmithKline,INDUSTRY,Completed,,Sponsor,,[],
8142,NCT00908206,"A Placebo-controlled, Single-blind, Cross-over...",Effects of GSK598809 on Brain Activation in Ab...,"[Substance Dependence, Alcohol Dependence]",April 2015,,GlaxoSmithKline,Interventional,This study will compare the effects of GSK5988...,[],...,65 Years,All,GlaxoSmithKline,INDUSTRY,Withdrawn,,,Study not approved by BfARM,[Phase 1],Phase 1
8154,NCT00907777,Vaccination With the Pneumococcal Vaccine GSK ...,Vaccination With GSK 1024850A in Children Prim...,"[Infections, Streptococcal, Streptococcus Pneu...",November 2020,,GlaxoSmithKline,Interventional,The aim of this study is to assess the immune ...,"[Vaccines, Heptavalent Pneumococcal Conjugate ...",...,50 Months,All,GlaxoSmithKline,INDUSTRY,Completed,,Sponsor,,[Phase 3],Phase 3


### Grouping by treatment, conditions and timeline
- Looking at the studies based on the treatments, conditions, and timelines

In [10]:
# if treatments and conditions are the same - then line them up!

studies['conditions_str'] = studies['conditions'].str.join('%%')
studies['treatments_str'] = studies['interventions'].str.join('%%')

In [11]:
studies

Unnamed: 0,study_id,official_title,short_title,conditions,verified_date,responsible_party,sponsor,type,description,interventions,...,org_full_name,org_class,overall_status,last_known_status,responsible_party_type,why_stopped,phase,phase_str,conditions_str,treatments_str
0,NCT05614648,"A Phase 3, Prospective, Multicenter, Randomize...",Sciatica Epidural Radiculopathy Experimental N...,[Lumbosacral Radiculopathy],November 2022,,"Sollis Therapeutics, Inc.",Interventional,This study will evaluate the safety and effect...,[Clonidine],...,"Sollis Therapeutics, Inc.",INDUSTRY,Not yet recruiting,,Sponsor,,[Phase 3],Phase 3,Lumbosacral Radiculopathy,Clonidine
1,NCT05611333,BE-PHIT: a Behavioral Science-Enhanced PHysici...,A Behavioral Science-Enhanced PHysician-led Re...,"[Coronary Artery Disease, Physical Inactivity,...",November 2022,Simin G. Lee,Brigham and Women's Hospital,Interventional,Ischemic heart disease is the leading cause of...,[],...,Brigham and Women's Hospital,OTHER,Recruiting,,Principal Investigator,,[Not Applicable],Not Applicable,Coronary Artery Disease%%Physical Inactivity%%...,
2,NCT05610488,Intravitreal Faricimab in Diabetic Macular Ede...,Intravitreal Faricimab in Diabetic Macular Ede...,[Diabetic Macular Edema],November 2022,PD Dr. med. Katja Hatz,Vista Klinik,Interventional,Title: Intravitreal faricimab in diabetic macu...,[],...,Vista Klinik,OTHER,Not yet recruiting,,Principal Investigator,,[Phase 4],Phase 4,Diabetic Macular Edema,
3,NCT05616767,Prevention and Screening for Early Detection o...,Prevention and Screening for Early Detection o...,[HPV],November 2022,,University of Minnesota,Interventional,The long-term objective of the parent study is...,[],...,University of Minnesota,OTHER,Not yet recruiting,,Sponsor,,[Not Applicable],Not Applicable,HPV,
4,NCT05612724,Rutgers Pilot for Pragmatic Return to Effectiv...,Rutgers Pilot for PREDICT- Patient POC Test,[SARS CoV 2 Infection],November 2022,"Cecile A. Feldman, DMD","Rutgers, The State University of New Jersey",Interventional,A pilot study was initiated to assess feasibil...,[],...,"Rutgers, The State University of New Jersey",OTHER,Completed,,Principal Investigator,,[Not Applicable],Not Applicable,SARS CoV 2 Infection,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8298,NCT00904475,"A Prospective, Prospective, Double-blind, Rand...",Pilot Study of the Efficacy and Safety of Lido...,[Chronic Low Back Pain],February 2010,,Endo Pharmaceuticals,Interventional,Patients with moderate to severe chronic Low B...,[Lidocaine],...,Endo Pharmaceuticals,INDUSTRY,Completed,,,,[Phase 4],Phase 4,Chronic Low Back Pain,Lidocaine
8299,NCT00900965,A Pilot Study on the Effect of Electroacupunct...,Effect of Electroacupuncture in Patients With ...,[Irritable Bowel Syndrome],June 2009,,Chinese University of Hong Kong,Interventional,Irritable bowel syndrome (IBS) is the most com...,[],...,Chinese University of Hong Kong,OTHER,Completed,,,,[Not Applicable],Not Applicable,Irritable Bowel Syndrome,
8300,NCT00909181,"A Double-Blind, Randomized, Parallel, Placebo-...",Study of Topically Administered Oxybutynin Gel...,"[Urge Urinary Incontinence, Urinary Frequency]",June 2014,,Antares Pharma Inc.,Interventional,The primary objective of the double-blind phas...,"[Mandelic Acids, Oxybutynin]",...,Antares Pharma Inc.,INDUSTRY,Completed,,Sponsor,,[Phase 3],Phase 3,Urge Urinary Incontinence%%Urinary Frequency,Mandelic Acids%%Oxybutynin
8301,NCT00905008,Long-Term Safety and Efficacy of Drug Eluting ...,Long-Term Safety of Drug Eluting Stents in the...,[Stent Thrombosis],May 2009,,University Hospital Freiburg,Observational,The FReIburger STent Registry (FRIST) is desig...,[],...,University Hospital Freiburg,OTHER,Unknown status,"Active, not recruiting",,,[],,Stent Thrombosis,


In [13]:
studies['treatments_str'].value_counts()[:20]

                                                                  292796
Vaccines                                                            1715
Pembrolizumab                                                        833
Metformin                                                            761
Anesthetics                                                          735
Lidocaine                                                            710
Bupivacaine                                                          682
Nicotine                                                             608
Bevacizumab                                                          511
Rituximab                                                            497
Dexmedetomidine                                                      497
Nivolumab                                                            482
Insulin                                                              481
Paclitaxel                                         

In [20]:
studies[(studies['treatments_str'] == 'Ketamine') & (studies['phase_str'] == 'Phase 2')]['sponsor'].value_counts()

New York State Psychiatric Institute                      5
Medical University of South Carolina                      3
Yale University                                           3
The University of Texas Health Science Center, Houston    3
Mayo Clinic                                               3
Janssen Research & Development, LLC                       2
Icahn School of Medicine at Mount Sinai                   2
University Health Network, Toronto                        2
iX Biopharma Ltd.                                         2
Duke University                                           1
University College, London                                1
NYU Langone Health                                        1
Hospital for Special Surgery, New York                    1
Novartis Pharmaceuticals                                  1
Seelos Therapeutics, Inc.                                 1
Arkansas Children's Hospital Research Institute           1
University of Minnesota                 

## Targets
- I think the plan here is that we need to seperate the early stage trials 
- Then gather all of the measurements linked with those studies
- Then we have to go through and manually parse out the "target" from the measures
- Then train a transformer on the datasets



In [25]:
studies['phase_str'].value_counts()

Not Applicable     155696
                    99807
Phase 2             53301
Phase 1             39107
Phase 3             35884
Phase 4             30377
Phase 1/Phase 2     13362
Phase 2/Phase 3      6349
Early Phase 1        4420
Name: phase_str, dtype: int64

In [24]:
early_stage_studies = studies[studies['phase_str'] == 'Early Phase 1']

In [26]:
early_stage_studies

Unnamed: 0,study_id,official_title,short_title,conditions,verified_date,responsible_party,sponsor,type,description,interventions,...,org_full_name,org_class,overall_status,last_known_status,responsible_party_type,why_stopped,phase,phase_str,conditions_str,treatments_str
232,NCT05616468,Clinical Study of the Safety and Initial Effic...,BGT007 Cell Treatment of Nasopharyngeal Carcinoma,[Nasopharyngeal Carcinoma],December 2022,,The Affiliated Hospital of Xuzhou Medical Univ...,Interventional,This is an exploratory study to evaluate the s...,"[Cyclophosphamide, Fludarabine]",...,The Affiliated Hospital of Xuzhou Medical Univ...,OTHER,Recruiting,,Sponsor,,[Early Phase 1],Early Phase 1,Nasopharyngeal Carcinoma,Cyclophosphamide%%Fludarabine
449,NCT05610072,Glutamatergic Mechanisms in Opioid and Cocaine...,Behavioral Effects of Drugs (Inpatient): 43 (O...,"[Cocaine Use Disorder, Opioid Use Disorder]",December 2022,William Stoops,William Stoops,Interventional,The overarching hypotheses of this protocol ar...,"[Acetylcysteine, Hydromorphone, N-monoacetylcy...",...,University of Kentucky,OTHER,Recruiting,,Sponsor-Investigator,,[Early Phase 1],Early Phase 1,Cocaine Use Disorder%%Opioid Use Disorder,Acetylcysteine%%Hydromorphone%%N-monoacetylcys...
510,NCT05612074,Kinetics of Methemoglobin Concentration in Hea...,Methemoglobin Concentration in High Dose Inhal...,[Healthy Subjects],November 2022,"Lorenzo Berra, MD",Massachusetts General Hospital,Interventional,The goal of this clinical trial is to learn ab...,[Nitric Oxide],...,Massachusetts General Hospital,OTHER,Not yet recruiting,,Principal Investigator,,[Early Phase 1],Early Phase 1,Healthy Subjects,Nitric Oxide
547,NCT05614440,The Effects of Tramadol Combined With Local An...,The Effects of Tramadol Combined With Local An...,[Pain After the Surgical Extraction of Third M...,November 2022,,Marmara University,Interventional,The purpose of this study was to assess the ef...,"[Tramadol, Anesthetics, Anesthetics, Local]",...,Marmara University,OTHER,Completed,,Sponsor,,[Early Phase 1],Early Phase 1,Pain After the Surgical Extraction of Third Mo...,"Tramadol%%Anesthetics%%Anesthetics, Local"
660,NCT05612854,Catheter Directed Therapy for Intermediate Ris...,Catheter Directed Therapy in Intermediate Risk...,[Pulmonary Embolism Subacute Massive],November 2022,Ayman khairy Mohamed,Assiut University,Interventional,Aim of the work:\n\nTo compare conventional me...,[],...,Assiut University,OTHER,Not yet recruiting,,Principal Investigator,,[Early Phase 1],Early Phase 1,Pulmonary Embolism Subacute Massive,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7478,NCT03419858,The Role of Endogenous Opioidergic Systems in ...,The Role of Opioidergic Systems in Breathing B...,[Pain],June 2019,Fadel Zeidan,"University of California, San Diego",Interventional,The purpose of this psychophysical and pharmac...,[Naloxone],...,"University of California, San Diego",OTHER,Completed,,Principal Investigator,,[Early Phase 1],Early Phase 1,Pain,Naloxone
7680,NCT00901706,Multidimensional Assessment and Intervention f...,Multidimensional Assessment and Intervention f...,"[Self-Neglect, Cognitive Ability, General, Ger...",June 2013,Carmel Dyer,"The University of Texas Health Science Center,...",Interventional,This study is designed to assess the best meth...,[],...,"The University of Texas Health Science Center,...",OTHER,Completed,,Principal Investigator,,[Early Phase 1],Early Phase 1,"Self-Neglect%%Cognitive Ability, General%%Geri...",
7821,NCT00906620,Saving and Empowering Young Lives in Europe in...,Saving and Empowering Young Lives in Europe (I...,"[Depression, Suicidality]",March 2014,alan apter,Rabin Medical Center,Interventional,SEYLE is a health promoting program for adoles...,[],...,Rabin Medical Center,OTHER,Completed,,Principal Investigator,,[Early Phase 1],Early Phase 1,Depression%%Suicidality,
8119,NCT00908791,An in Vivo Proof of Principle Trial to Determi...,Proof of Principle Trial to Determine if Nutri...,[Breast Cancer],June 2018,"Lionel.D.Lewis, MD",Dartmouth-Hitchcock Medical Center,Interventional,Conjugated Linoleic Acid (CLA) is obtained in ...,[],...,Dartmouth-Hitchcock Medical Center,OTHER,Completed,,Principal Investigator,,[Early Phase 1],Early Phase 1,Breast Cancer,


In [34]:
measures = pd.concat([pd.read_pickle('all_measures_v2_'+str(x)+'.pkl') for x in range(44)])

In [36]:
measures.head()

Unnamed: 0,study,title,description,timeframe,type
0,NCT03913884,soft tissue healing,This was assessed on the third and seventh pre...,Change of soft tissue healing at 7 days,primary
1,NCT03913884,severity of postoperative pain,The patients were instructed to note their lev...,Change of postoperative pain at 7 days,secondary
2,NCT03913884,presence of swelling,three facial measurements were taken with mili...,Change of swelling at 7 days,secondary
3,NCT03913884,presence of trismus,"Inter-incisal distance was measured, as a guid...",Change of trismus at 7 days,secondary
4,NCT03911440,Defervescence,The timing (days) when fever subsides after tr...,Up to 10 days,primary


In [40]:
early_study_measures = early_stage_studies.merge(measures, left_on='study_id', right_on='study')

In [44]:
early_study_measures.head()

Unnamed: 0,study_id,official_title,short_title,conditions,verified_date,responsible_party,sponsor,type_x,description_x,interventions,...,why_stopped,phase,phase_str,conditions_str,treatments_str,study,title,description_y,timeframe,type_y
0,NCT05616468,Clinical Study of the Safety and Initial Effic...,BGT007 Cell Treatment of Nasopharyngeal Carcinoma,[Nasopharyngeal Carcinoma],December 2022,,The Affiliated Hospital of Xuzhou Medical Univ...,Interventional,This is an exploratory study to evaluate the s...,"[Cyclophosphamide, Fludarabine]",...,,[Early Phase 1],Early Phase 1,Nasopharyngeal Carcinoma,Cyclophosphamide%%Fludarabine,NCT05616468,Dose-limiting toxicity（DLT）,Adverse events related to cell therapy were ob...,From day 0 to day 28,primary
1,NCT05616468,Clinical Study of the Safety and Initial Effic...,BGT007 Cell Treatment of Nasopharyngeal Carcinoma,[Nasopharyngeal Carcinoma],December 2022,,The Affiliated Hospital of Xuzhou Medical Univ...,Interventional,This is an exploratory study to evaluate the s...,"[Cyclophosphamide, Fludarabine]",...,,[Early Phase 1],Early Phase 1,Nasopharyngeal Carcinoma,Cyclophosphamide%%Fludarabine,NCT05616468,Cmax,The amplification of BGT007 cells in periphera...,12 months,secondary
2,NCT05616468,Clinical Study of the Safety and Initial Effic...,BGT007 Cell Treatment of Nasopharyngeal Carcinoma,[Nasopharyngeal Carcinoma],December 2022,,The Affiliated Hospital of Xuzhou Medical Univ...,Interventional,This is an exploratory study to evaluate the s...,"[Cyclophosphamide, Fludarabine]",...,,[Early Phase 1],Early Phase 1,Nasopharyngeal Carcinoma,Cyclophosphamide%%Fludarabine,NCT05616468,Tmax,Number of days of peak BGT007 cell expansion a...,12 months,secondary
3,NCT05616468,Clinical Study of the Safety and Initial Effic...,BGT007 Cell Treatment of Nasopharyngeal Carcinoma,[Nasopharyngeal Carcinoma],December 2022,,The Affiliated Hospital of Xuzhou Medical Univ...,Interventional,This is an exploratory study to evaluate the s...,"[Cyclophosphamide, Fludarabine]",...,,[Early Phase 1],Early Phase 1,Nasopharyngeal Carcinoma,Cyclophosphamide%%Fludarabine,NCT05616468,AUC(Day 0 to Day 28),The area under the curve of BGT007 cells from ...,From day 0 to day 28,secondary
4,NCT05616468,Clinical Study of the Safety and Initial Effic...,BGT007 Cell Treatment of Nasopharyngeal Carcinoma,[Nasopharyngeal Carcinoma],December 2022,,The Affiliated Hospital of Xuzhou Medical Univ...,Interventional,This is an exploratory study to evaluate the s...,"[Cyclophosphamide, Fludarabine]",...,,[Early Phase 1],Early Phase 1,Nasopharyngeal Carcinoma,Cyclophosphamide%%Fludarabine,NCT05616468,ORR,Proportion of patients who achieved pre-define...,12 months,secondary


In [51]:
early_study_measures[
    early_study_measures['official_title'].str.contains('target') |
    early_study_measures['description_x'].str.contains('target') |
    early_study_measures['description_y'].str.contains('target') |
    early_study_measures['title'].str.contains('target') |
    early_study_measures['official_title'].str.contains('mech') |
    early_study_measures['description_x'].str.contains('mech') |
    early_study_measures['description_y'].str.contains('mech') |
    early_study_measures['title'].str.contains('mech')][['study', 'title', 'description_y']].rename(columns={
    'description_y': 'description'
}).to_csv('potential_target_measures.csv')

In [54]:
studies[studies['treatments_str'].str.contains('pranazepide')]

Unnamed: 0,study_id,official_title,short_title,conditions,verified_date,responsible_party,sponsor,type,description,interventions,...,org_full_name,org_class,overall_status,last_known_status,responsible_party_type,why_stopped,phase,phase_str,conditions_str,treatments_str
