# The Anatomy of a Successful FDA-Approved Drug

In [1]:
import os
import re
import sys
import time
import tqdm
import string
import datetime
import requests
import textwrap
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from collections import defaultdict
# turn off lxml warning
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
# clinical trial info
from databases.ct_per_year import worldwide_ct_per_year
# pubchempy is a Python wrapper for the PubChem PUG REST API
import pubchempy as pcp
# pytrials is a Python wrapper for the ClinicalTrials.gov API
from pytrials.client import ClinicalTrials
# custom functions
from utils.fda_sponsors import fda_sponsor_list, rename_sponsors
from utils.webpage_scraping import test_connection
from utils.text_search import find_drug, find_df_overlap, combine_ddc_databases
from utils.pickle_dataframes import pickle_dataframe, unpickle_dataframes, read_excel
# auto reload custom functions
%load_ext autoreload
%autoreload 2
# other settings
sys.setrecursionlimit(10000) # increase recursion limit for pickle

In [None]:
# convert worldwide_ct_per_year dictionary to dataframe
ct_per_year_df = pd.DataFrame(columns=["year", "count"])
for year, count in worldwide_ct_per_year.items():
		ct_per_year_df = pd.concat([ct_per_year_df, pd.DataFrame({"year": [year], "count": [count]})])
ct_per_year_df.reset_index(drop=True, inplace=True)
# sort by year
ct_per_year_df = ct_per_year_df.sort_values(by="year", ascending=True, ignore_index=True)
ct_per_year_df

**Source:** https://nextjournal.com/asmirnov-horis/bbc-visual-and-data-journalism-cookbook-for-lets-plot

In [10]:
from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()

In [17]:
line_size = 1.6

def bbc_theme(show_x_axis=True):
    def get_element_text(title=False, subtitle=False, size=21):
        face = None
        margin = None
        if title:
            size = 33
            face = "bold"
            margin = [11, 0, 0, 0]
        if subtitle:
            size = 26
            margin = [9, 0, 0, 0]
        return element_text(family="Helvetica", face=face, size=size, margin=margin)
    result = theme(
        plot_title=get_element_text(title=True),
        plot_subtitle=get_element_text(subtitle=True),
        legend_position='top',
        legend_background='blank',
        legend_title='blank',
        legend_text=get_element_text(),
        axis_title='blank',
        axis_text=get_element_text(),
        axis_text_x=element_text(margin=[20, 20]),
        axis_text_y=element_text(margin=[10, 5]),
        axis_ticks='blank',
        axis_line=element_line(size=2*line_size) if show_x_axis else 'blank',
        axis_ontop_x=True,
        panel_grid_minor='blank',
        panel_grid_major_y=element_line(size=line_size*6/5, color='#CBCBCB'),
        panel_grid_major_x='blank',
        panel_background='blank',
        strip_text=element_text(size=26, hjust=0),
    )
    if show_x_axis:
        result += coord_cartesian(ylim=[0, None]) + scale_y_continuous(expand=[.15, 0])
    return result

In [18]:
line_df = ct_per_year_df[ct_per_year_df["year"] >= '2012']
line_title = 'Number of Clinical Trials Registered'
line_subtitle = 'Worldwide 2000-2021'

ggplot(line_df, aes('year', 'count')) + \
    geom_line(color='#1380A1', size=line_size, \
              tooltips=layer_tooltips().format("@year", "d")) + \
    scale_x_continuous(format='d') + \
    bbc_theme() + \
    ggsize(800, 450) + \
    labs(title=line_title, subtitle=line_subtitle)

**Source:** https://cast42.github.io/blog/cast42/jupyter/altair/2022/04/18/Economist-style.html

In [42]:
import altair as alt
# add label for each bar with the count and add plot title
# add source as small text in the bottom right corner
alt.Chart(line_df).mark_bar().encode(
    x=alt.X('count:Q', axis=alt.Axis(title='Number of Clinical Trials')),
    y=alt.Y('year:O', axis=alt.Axis(title='Year')),
	tooltip=['year', 'count'],
).properties(
	title='World Wide Clinical Trials Registered Per Year',
	width=800,
	height=400
)

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
