In [71]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import json
from datetime import datetime
import string

In [129]:
def parse_drug_name(drug_name):
    table = str.maketrans(dict.fromkeys(string.punctuation))
    drug_name_parsed = drug_name.translate(table)
    drug_name = drug_name.replace("-", " ")
    drug_name = drug_name.lower()
    #tokens = nltk.word_tokenize(drug_name)
    #tokens = set(tokens)
    
    return drug_name

In [147]:
# method for getting clinicaltrials.gov info for given company
def get_clinical_trials(company_name):
    
    ## clinicaltrials.gov searcher
    clintrials_base_url = "https://clinicaltrials.gov/api/query/study_fields"
    expr = "'" + company_name + "'"    
    
    fmt = "JSON"
    min_rnk = 1
    max_rnk = 1000
    fields1 = ','.join([
        'OrgFullName',
        'NCTId',
        'BriefTitle',
        'OfficialTitle',
        'DesignAllocation',
        'Acronym',
        'Phase',
        'OverallStatus',
        'DesignMasking',
        'StartDate',
        'EnrollmentCount',
        'EnrollmentType',
        'ArmGroupDescription',
        'ArmGroupInterventionName',
        'ArmGroupLabel',
        'ArmGroupType',
        'InterventionType',
        'InterventionName',
        'InterventionDescription',
        'InterventionArmGroupLabel'
    ])

    fields2 = ','.join([
        'PrimaryOutcomeMeasure',
        'NCTId',
        'PrimaryOutcomeDescription',
        'PrimaryOutcomeTimeFrame',
        'SecondaryOutcomeMeasure',
        'SecondaryOutcomeDescription',
        'SecondaryOutcomeTimeFrame',
        'EligibilityCriteria',
        'ConditionMeshId',
        'ConditionMeshTerm',
        'InterventionMeshId',
        'InterventionMeshTerm',
        'InterventionAncestorId',
        'InterventionAncestorTerm',
        'InterventionBrowseLeafId',
        'InterventionBrowseLeafName',
        'InterventionBrowseLeafAsFound',
        'InterventionBrowseLeafRelevance',
        'PrimaryCompletionDate',
        'CompletionDate'    
    ])

    fields3 = ','.join([
        'LeadSponsorName',
        'NCTId',
        'ResponsiblePartyType',
        'LeadSponsorClass',
        'IsFDARegulatedDrug',
        'BriefSummary',
        'DetailedDescription',
        'Condition',
        'Keyword',
        'StudyType',
        'DesignInterventionModel',
        'DesignPrimaryPurpose'
    ])

    clin_payload1 = {"expr": expr, "min_rnk": min_rnk, "max_rnk": max_rnk, "fmt": fmt, "fields": fields1}
    clin_payload2 = {"expr": expr, "min_rnk": min_rnk, "max_rnk": max_rnk, "fmt": fmt, "fields": fields2}
    clin_payload3 = {"expr": expr, "min_rnk": min_rnk, "max_rnk": max_rnk, "fmt": fmt, "fields": fields3}
    # make sure to add quotes to all terms so results are required to match
    
    clin_r1 = requests.get(clintrials_base_url, params=clin_payload1)
    clin_r2 = requests.get(clintrials_base_url, params=clin_payload2)
    clin_r3 = requests.get(clintrials_base_url, params=clin_payload3)
    
    clin_r1_dict = clin_r1.json()["StudyFieldsResponse"]["StudyFields"]
    for r1_dict in clin_r1_dict:
        for r1_key in r1_dict:
            if isinstance(r1_dict[r1_key], list):
                r1_dict[r1_key] = ", ".join(r1_dict[r1_key])
    
    clin_r2_dict = clin_r2.json()["StudyFieldsResponse"]["StudyFields"]
    for r2_dict in clin_r2_dict:
        for key in r2_dict:
            if isinstance(r2_dict[key], list):
                r2_dict[key] = ", ".join(r2_dict[key])
    
    clin_r3_dict = clin_r3.json()["StudyFieldsResponse"]["StudyFields"]
    for r3_dict in clin_r3_dict:
        for key in r3_dict:
            if isinstance(r3_dict[key], list):
                r3_dict[key] = ", ".join(r3_dict[key])
    
    try:
        
        if "StudyFields" in clin_r1.json()["StudyFieldsResponse"]:

            if clin_r1.status_code == 200:
                clin1_df = pd.DataFrame(clin_r1_dict)
            else:
                clin1_df = pd.DataFrame()
            if clin_r2.status_code == 200:
                clin2_df = pd.DataFrame(clin_r2_dict)
            else:
                clin2_df = pd.DataFrame()
            if clin_r3.status_code == 200:
                clin3_df = pd.DataFrame(clin_r3_dict)
            else:
                clin3_df = pd.DataFrame()

            list_clin1_org = []
            for row in clin1_df['OrgFullName']:
                list_clin1_org.append(row)

            list_clin1_nct = []
            for row in clin1_df['NCTId']:
                list_clin1_nct.append(row)

            list_clin2_nct = []
            for row in clin2_df['NCTId']:
                list_clin2_nct.append(row)

            list_clin3_nct = []
            for row in clin3_df['NCTId']:
                list_clin3_nct.append(row)

            clin1_df["Company_name"] = list_clin1_org
            clin1_df["NCT_ID_new"] = list_clin1_nct
            clin2_df["NCT_ID_new"] = list_clin2_nct
            clin3_df["NCT_ID_new"] = list_clin3_nct
            clin3_df["EDGAR_name"] = company_name

            clin_all_df = clin1_df.join(clin2_df.set_index("NCT_ID_new"), on="NCT_ID_new", rsuffix="_2").join(clin3_df.set_index("NCT_ID_new"), on="NCT_ID_new", rsuffix="_3")

            clin_all_df = clin_all_df[clin_all_df["Company_name"].str.contains(company_name, case=False)]

            return clin_all_df

        else:
            clin_all_df = pd.DataFrame()
            return clin_all_df
    
    except:
        
        empty_df = pd.DataFrame()
        
        return "error"

In [139]:
def clinical_trials_with_drug(trials_df, drug_names):
    filtered_trials = []
    trials_df_dict = trials_df.to_dict('records')
    for i in trials_df_dict:
        for drug_name in drug_names:
            if parse_drug_name(drug_name) in parse_drug_name(i["InterventionName"]):
                filtered_trials.append(i)
    
    filtered_trials_df = pd.DataFrame(filtered_trials)
    
    return filtered_trials_df

In [153]:
# company name is string, drug_names is list
def clinical_trials_company_drugs(company_name, drug_names):
    
    co_trials = get_clinical_trials(company_name)
    
    if co_trials.empty:
        
        return "no trials for company, check spelling of company name"
    
    else:

        trials = clinical_trials_with_drug(co_trials, drug_names)

        return trials

In [155]:
clinical_trials_company_drugs("Nektar Therapeutics", ["NKTR-214", "Bempegaldesleukin"])

Unnamed: 0,Rank,OrgFullName,NCTId,BriefTitle,OfficialTitle,DesignAllocation,Acronym,Phase,OverallStatus,DesignMasking,...,LeadSponsorClass,IsFDARegulatedDrug,BriefSummary,DetailedDescription,Condition,Keyword,StudyType,DesignInterventionModel,DesignPrimaryPurpose,EDGAR_name
0,3,Nektar Therapeutics,NCT03729245,A Study of Bempegaldesleukin (NKTR-214: BEMPEG...,A Phase 3 Randomized Open Label Study to Compa...,Randomized,,Phase 3,Recruiting,None (Open Label),...,INDUSTRY,Yes,The main purpose of this study is to compare t...,,"Renal Cell Carcinoma, Metastatic Renal Cell Ca...","Kidney Cancer, Kidney Neoplasms, Renal Cancer,...",Interventional,Parallel Assignment,Treatment,Nektar Therapeutics
1,5,Nektar Therapeutics,NCT03435640,A Study of NKTR-262 in Combination With Bempeg...,"A Phase 1/2, Open-label, Multicenter, Dose Esc...",Non-Randomized,REVEAL,"Phase 1, Phase 2",Recruiting,None (Open Label),...,INDUSTRY,Yes,Patients will receive intra-tumoral (IT) NKTR-...,Cancer treatments that couple pharmacological ...,"Melanoma, Merkel Cell Carcinoma, Triple Negati...","Bempegaldesleukin (NKTR-214), NKTR-262, Nivolu...",Interventional,Parallel Assignment,Treatment,Nektar Therapeutics
2,6,Nektar Therapeutics,NCT04646044,A Placebo Controlled Trial of Bempegaldesleuki...,"A Phase 1b, Multicenter, Randomized, Double-Bl...",Randomized,,Phase 1,Recruiting,Double,...,INDUSTRY,Yes,"The main purpose of this phase-1b, multicenter...",,"Covid-19, Coronavirus Disease 2019","BEMPEG, Bempegaldesleukin, CD122, CD122-Biased...",Interventional,Parallel Assignment,Treatment,Nektar Therapeutics
3,7,Nektar Therapeutics,NCT02983045,A Dose Escalation and Cohort Expansion Study o...,"A Phase 1/2, Open-label, Multicenter Study of ...",Non-Randomized,PIVOT-02,"Phase 1, Phase 2","Active, not recruiting",None (Open Label),...,INDUSTRY,Yes,"In this four-part study, NKTR-214 will be admi...",NKTR-214 (investigational agent) is an IL-2 pa...,"Melanoma, Renal Cell Carcinoma, Non Small Cell...","NKTR-214, Bempegaldesleukin, Nivolumab, Ipilim...",Interventional,Parallel Assignment,Treatment,Nektar Therapeutics
4,9,Nektar Therapeutics,NCT03138889,A Study of a CD122-Biased Cytokine (NKTR-214) ...,"A Phase 1/2, Open-Label, Multicenter Study to ...",Non-Randomized,PROPEL,"Phase 1, Phase 2",Recruiting,None (Open Label),...,INDUSTRY,Yes,This study is to assess the safety and tolerab...,NKTR-214 is a cytokine (investigational agent)...,"Non-Small Cell Lung Cancer, Melanoma, Urotheli...","NKTR-214, Metastatic Urothelial Bladder Cancer...",Interventional,Single Group Assignment,Treatment,Nektar Therapeutics
5,12,Nektar Therapeutics,NCT04410445,Study to Compare Adjuvant Immunotherapy of Bem...,"A Phase 3, Randomized, Open-label Study to Com...",Randomized,PIVOT-12,Phase 3,Recruiting,None (Open Label),...,INDUSTRY,Yes,The main purpose of this study is to compare t...,The main purpose of this study is to compare t...,"Melanoma, Melanoma Stage III, Melanoma Stage I...","CD122-Biased Agonist, CD122-Biased Cytokine, I...",Interventional,Parallel Assignment,Treatment,Nektar Therapeutics
6,16,Nektar Therapeutics,NCT03785925,A Single-Arm Study of Bempegaldesleukin (NKTR-...,"A Phase 2, Single-Arm Study of Bempegaldesleuk...",,PIVOT-10,Phase 2,"Active, not recruiting",None (Open Label),...,INDUSTRY,Yes,The main purpose of this study is to evaluate ...,,"Urinary Bladder Neoplasm, Neoplasm Metastasis","Bladder, Bladder Cancer, CD122, Cisplatin Inel...",Interventional,Single Group Assignment,Treatment,Nektar Therapeutics
7,23,Nektar Therapeutics,NCT02869295,A Phase 1/2 Multicenter Dose Escalation and Ex...,"An Open-Label, Multicenter, Dose Escalation An...",,NKTR-214,"Phase 1, Phase 2",Completed,None (Open Label),...,INDUSTRY,,"This is a first in human, open-label, sequenti...",,"Unspecified Adult Solid Tumor, Protocol Specific","Metastatic Solid Tumors, Locally Advanced Soli...",Interventional,Single Group Assignment,Treatment,Nektar Therapeutics
