In [1]:
import json
import requests
import pandas as pd
import numpy as np

# Fetch data using clinicaltrials.gov api:

In [2]:
#base url:
url = 'https://clinicaltrials.gov/api/query/study_fields?expr=cannabidiol'

#fetch the data on these fields:
fields = 'OverallStatus,NCTId,Condition,BriefTitle,BriefSummary,EnrollmentCount,InterventionName'

#build the url, get up to 1000 records (that's the limit) in JSON format
url += '&fields='+fields+'&max_rnk=1000&fmt=JSON'

#make the query
data = requests.get(url)

#parse the json into a dict object:
data_as_dict = json.loads(data.content)

# Put into a dataframe:

In [3]:
df = pd.DataFrame(columns=['Title', 'Status', 'Intervention', 'Enrolment', 'Condition', 'NCTId'])
count=0
for dct in data_as_dict['StudyFieldsResponse']['StudyFields']:

    for condition in dct['Condition']:
        for intervention in dct['InterventionName']:
            #print('\t', item)
            try:
                enrolment = int(dct['EnrollmentCount'][0])
            except:
                enrolment = 0
            df.loc[count] = [dct['BriefTitle'][0], dct['OverallStatus'][0], intervention, 
                         enrolment, condition, dct['NCTId'][0]]
            count+=1

In [4]:
df.head()

Unnamed: 0,Title,Status,Intervention,Enrolment,Condition,NCTId
0,Cannabidiol Use to Reduce Cravings in Individu...,Recruiting,Cannabidiol 600mg,12,Opioid-use Disorder,NCT04192370
1,Cannabidiol Pharmacotherapy for Adults With Ca...,Completed,Cannabidiol,10,Cannabis Use Disorder,NCT03102918
2,Cannabidiol Pharmacotherapy for Adults With Ca...,Completed,Placebo,10,Cannabis Use Disorder,NCT03102918
3,Low Dose of Cannabidiol (CBD) to Treat Mild to...,Completed,Cannabidiol (CBD),16,Musculoskeletal Pain,NCT04193631
4,Trial of Cannabidiol to Treat Severe Behavior ...,Recruiting,Epidiolex,30,Autism,NCT04517799


# Plot with altair:

In [5]:
import altair as alt

chart = alt.Chart(pd.DataFrame(df.drop_duplicates(['NCTId', 'Condition'])['Condition'])).mark_bar().encode(
    alt.Y('Condition:O', sort='-x'),
    alt.X('count(Condition):Q')
)

chart.properties(
    title='Number of conditions investigated for CBD',
    height=1500
)