# Load Depenency

In [8]:
#Cleaned up Crypto Example
from os import system
import pandas as pd
import requests
import datetime as dt
# from pytrials.client import ClinicalTrials
import json
import ipywidgets as widgets
import numpy as np
np.random.seed(10031975)

In [9]:
from pytrials.utils import json_handler, csv_handler


class ClinicalTrials:
    """ClinicalTrials API client

    Provides functions to easily access the ClinicalTrials.gov API
    (https://clinicaltrials.gov/api/)
    in Python.

    Attributes:
        study_fields: List of all study fields you can use in your query.
        api_info: Tuple containing the API version number and the last
        time the database was updated.
    """

    _BASE_URL = "https://clinicaltrials.gov/api/"
    _INFO = "info/"
    _QUERY = "query/"
    _JSON = "fmt=json"
    _CSV = "fmt=csv"

    def __init__(self):
        self.api_info = self.__api_info()

    @property
    def study_fields(self):
        fields_list = json_handler(
            f"{self._BASE_URL}{self._INFO}study_fields_list?{self._JSON}"
        )
        return fields_list["StudyFields"]["Fields"]

    def __api_info(self):
        """Returns information about the API"""
        last_updated = json_handler(
            f"{self._BASE_URL}{self._INFO}data_vrs?{self._JSON}"
        )["DataVrs"]
        api_version = json_handler(f"{self._BASE_URL}{self._INFO}api_vrs?{self._JSON}")[
            "APIVrs"
        ]

        return api_version, last_updated

    def get_full_studies(self, search_expr, max_studies=50):
        """Returns all content for a maximum of 100 study records.

        Retrieves information from the full studies endpoint, which gets all study fields.
        This endpoint can only output JSON (Or not-supported XML) format and does not allow
        requests for more than 100 studies at once.

        Args:
            search_expr (str): A string containing a search expression as specified by
                `their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
            max_studies (int): An integer indicating the maximum number of studies to return.
                Defaults to 50.

        Returns:
            dict: Object containing the information queried with the search expression.

        Raises:
            ValueError: The number of studies can only be between 1 and 100
        """
        if max_studies > 100 or max_studies < 1:
            raise ValueError("The number of studies can only be between 1 and 100")

        req = f"full_studies?expr={search_expr}&max_rnk={max_studies}&{self._JSON}"

        full_studies = json_handler(f"{self._BASE_URL}{self._QUERY}{req}")

        return full_studies

    def get_study_fields(self, search_expr, fields, max_studies=50, min_rnk=1,fmt="csv"):
        """Returns study content for specified fields

        Retrieves information from the study fields endpoint, which acquires specified information
        from a large (max 1000) studies. To see a list of all possible fields, check the class'
        study_fields attribute.

        Args:
            search_expr (str): A string containing a search expression as specified by
                `their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
            fields (list(str)): A list containing the desired information fields.
            max_studies (int): An integer indicating the maximum number of studies to return.
                Defaults to 50.
            fmt (str): A string indicating the output format, csv or json. Defaults to csv.

        Returns:
            Either a dict, if fmt='json', or a list of records (e.g. a list of lists), if fmt='csv.
            Both containing the maximum number of study fields queried using the specified search expression.

        Raises:
            ValueError: The number of studies can only be between 1 and 1000
            ValueError: One of the fields is not valid! Check the study_fields attribute
                for a list of valid ones.
            ValueError: Format argument has to be either 'csv' or 'json'
        """
        if max_studies > 1000 or max_studies < 1:
            raise ValueError("The number of studies can only be between 1 and 1000")
        elif not set(fields).issubset(self.study_fields):
            raise ValueError(
                "One of the fields is not valid! Check the study_fields attribute for a list of valid ones."
            )
        else:
            concat_fields = ",".join(fields)
            # req = f"study_fields?expr={search_expr}&max_rnk={max_studies}&fields={concat_fields}"
            req = f"study_fields?expr={search_expr}&min_rnk={min_rnk}&max_rnk={max_studies+min_rnk-1}&fields={concat_fields}"
            if fmt == "csv":
                url = f"{self._BASE_URL}{self._QUERY}{req}&{self._CSV}"
                return csv_handler(url)

            elif fmt == "json":
                url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
                return json_handler(url)

            else:
                raise ValueError("Format argument has to be either 'csv' or 'json'")

    def get_study_count(self, search_expr):
        """Returns study count for specified search expression

        Retrieves the count of studies matching the text entered in search_expr.

        Args:
            search_expr (str): A string containing a search expression as specified by
                `their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.

        Returns:
            An integer

        Raises:
            ValueError: The search expression cannot be blank.
        """
        if not set(search_expr):
            raise ValueError("The search expression cannot be blank.")
        else:
            req = f"study_fields?expr={search_expr}&max_rnk=1&fields=NCTId"
            url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
            returned_data = json_handler(url)
            study_count = returned_data["StudyFieldsResponse"]["NStudiesFound"]
            return study_count

    def __repr__(self):
        return f"ClinicalTrials.gov client v{self.api_info[0]}, database last updated {self.api_info[1]}"

# Load Data

https://clinicaltrials.gov/api/gui/ref/crosswalks

Disease 2 search for

In [None]:
name=input()

print(name)

hit mesh some day

In [None]:
# pd.DataFrame.from_records(ct_data[1:], columns=ct_data[0])

Pull ONE

#Explore Data

In [10]:
#change this out to NAME

ct = ClinicalTrials()
infodf= (ct.get_full_studies(search_expr="Parkinson", max_studies=5 ))


In [None]:
type(infodf)

In [None]:
number = print(infodf['FullStudiesResponse']['NStudiesFound'])

In [11]:
ct.get_study_count(search_expr="Parkinson")


3656

'NStudiesFound': 3656,

In [None]:
pd.read_json(infodf)

In [None]:
# df = pd.DataFrame([])
# for i in range(5):
#     data = dict(zip(np.random.choice(10, replace=False, size=5),
#                     np.random.randint(10, size=5)))
#     data = pd.DataFrame(data.items())
#     data = data.transpose()
#     data.columns = data.iloc[0]
#     data = data.drop(data.index[[0]])
#     df = df.append(data)
# print('{}\n'.format(df))

In [27]:
temp = pd.DataFrame()
for i in range(1,1000, 100):
    # print(i)
    print(temp)
    temp = ct.get_study_fields(
    search_expr="Parkinson",
    fields=["NCTId"],
    max_studies=100,
    min_rnk=i,
    fmt="csv",
)
 
 
temp = pd.DataFrame.from_records(api_pull_2[1:], columns=api_pull_2[0])

Empty DataFrame
Columns: []
Index: []
[['Rank', 'NCTId'], ['1', 'NCT02370134'], ['2', 'NCT01662427'], ['3', 'NCT05245955'], ['4', 'NCT03152721'], ['5', 'NCT02016092'], ['6', 'NCT03888287'], ['7', 'NCT01877148'], ['8', 'NCT03111472'], ['9', 'NCT04994015'], ['10', 'NCT04023201'], ['11', 'NCT02975193'], ['12', 'NCT00037830'], ['13', 'NCT00256204'], ['14', 'NCT04032262'], ['15', 'NCT04648150'], ['16', 'NCT04903769'], ['17', 'NCT04176302'], ['18', 'NCT02283073'], ['19', 'NCT02917122'], ['20', 'NCT02012647'], ['21', 'NCT04057794'], ['22', 'NCT01835652'], ['23', 'NCT03830190'], ['24', 'NCT02038959'], ['25', 'NCT04729010'], ['26', 'NCT02896816'], ['27', 'NCT00282802'], ['28', 'NCT05164783'], ['29', 'NCT00880620'], ['30', 'NCT02763683'], ['31', 'NCT05084209'], ['32', 'NCT04876404'], ['33', 'NCT03337464'], ['34', 'NCT01747655'], ['35', 'NCT03848455'], ['36', 'NCT02265900'], ['37', 'NCT01301651'], ['38', 'NCT04553185'], ['39', 'NCT03384797'], ['40', 'NCT01955616'], ['41', 'NCT02474329'], ['42', '

In [17]:




api_pull_2 = ct.get_study_fields(
    search_expr="Parkinson",
    fields=["NCTId"],
    max_studies=10,
    min_rnk=5,
    fmt="csv",
)

# ClinicalTrials limits API queries to 1000 records
# Count of studies may be useful to build loops when you want to retrieve more than 1000 records



# Read the csv data in Pandas
import pandas as pd

temp = pd.DataFrame.from_records(api_pull_2[1:], columns=api_pull_2[0])
temp

1
101
201
301
401
501
601
701
801
901


Unnamed: 0,Rank,NCTId
0,5,NCT02016092
1,6,NCT03888287
2,7,NCT01877148
3,8,NCT03111472
4,9,NCT04994015
5,10,NCT04023201
6,11,NCT02975193
7,12,NCT00037830
8,13,NCT00256204
9,14,NCT04032262


In [12]:




# Get the NCTId, Condition and Brief title fields from 500 studies related to Coronavirus and Covid, in csv format.
api_pull_1 = ct.get_study_fields(
    search_expr="Parkinson",
    fields=["NCTId", "Condition", "OfficialTitle", "BriefTitle" , "Acronym" , "StudyType",
    "InterventionType","InterventionName","InterventionOtherName","InterventionDescription","Phase" 
    ,"StudyFirstSubmitDate","LastUpdateSubmitDate","CompletionDate","OverallStatus","IsFDARegulatedDrug","IsFDARegulatedDevice","BriefSummary"],
    max_studies=999,
    fmt="csv",
)

# ClinicalTrials limits API queries to 1000 records
# Count of studies may be useful to build loops when you want to retrieve more than 1000 records



# Read the csv data in Pandas
import pandas as pd

df1=pd.DataFrame.from_records(api_pull_1[1:], columns=api_pull_1[0])

# 
# df1 = pd.DataFrame.to_frame().reset_index()



{'FullStudiesResponse': {'APIVrs': '1.01.05',
  'DataVrs': '2022:04:27 23:03:31.564',
  'Expression': 'Parkinson',
  'NStudiesAvail': 413210,
  'NStudiesFound': 3656,
  'MinRank': 1,
  'MaxRank': 50,
  'NStudiesReturned': 50,
  'FullStudies': [{'Rank': 1,
    'Study': {'ProtocolSection': {'IdentificationModule': {'NCTId': 'NCT02370134',
       'OrgStudyIdInfo': {'OrgStudyId': '005/58'},
       'Organization': {'OrgFullName': 'Chulalongkorn University',
        'OrgClass': 'OTHER'},
       'BriefTitle': "Development of Parkinson's Glove for Detection and Suppression of Hand Tremor",
       'OfficialTitle': "Development of Parkinson's Glove for Detection and Suppression of Hand Tremor at Rest Among the Tremor-predominant Parkinson's Disease Patients With Medically Intractable Tremor"},
      'StatusModule': {'StatusVerifiedDate': 'February 2015',
       'OverallStatus': 'Unknown status',
       'LastKnownStatus': 'Not yet recruiting',
       'ExpandedAccessInfo': {'HasExpandedAccess': 'N

In [None]:
api_pull_2 = ct.get_study_fields(
    search_expr="Parkinson",
    fields=["NCTId","IsFDARegulatedDrug","IsFDARegulatedDevice", "IsUnapprovedDevice", "PrimaryOutcomeMeasure", "PrimaryOutcomeDescription","PrimaryOutcomeTimeFrame", "SecondaryOutcomeMeasure","SecondaryOutcomeDescription", "SecondaryOutcomeTimeFrame","OtherOutcomeMeasure"
,"OtherOutcomeDescription","OtherOutcomeTimeFrame","EligibilityCriteria","StudyPopulation","HealthyVolunteers", "ReferencePMID", "LocationCity", "LocationState" , "LocationFacility"

],
    max_studies=999,
    fmt="csv",
)

# ClinicalTrials limits API queries to 1000 records
# Count of studies may be useful to build loops when you want to retrieve more than 1000 records

ct.get_study_count(search_expr="Parkinson")

# Read the csv data in Pandas
import pandas as pd

df2 = pd.DataFrame.from_records(api_pull_2[1:], columns=api_pull_2[0])

##cleaning

In [None]:
print(type(df1))

In [None]:
result = pd.merge(df1, df2,  on='NCTId', how='outer')

In [None]:
print(result)

#Next let's drop the first row (SNo)

## not need to drop SNO - will drop later - just moved the mask df to a new name