In [None]:
import pandas as pd


## File Import

In [None]:
#Import the Drug_clean.csv file in DataResources Folder
#https://www.kaggle.com/datasets/thedevastator/drug-performance-evaluation?select=Drug_clean.csv
df = pd.read_csv('DataResources/Drug_clean.csv')

#Remove uncessary columns
df_clean = df.drop(columns=['Indication', 'Type'])

df_clean.head()

## Data Clanup
#### The broader CSV file will be split into the following individual CSV files, each representing a table in the relational database.

In [None]:
# Conditions Table
# Determine the unique values in the Condition column
conditions = pd.DataFrame(df_clean['Condition'].unique(), columns=['Condition'])

# Include index values into the conditions df and rename column as Condition_no. This column will serve as PK.
conditions.reset_index(inplace=True)
conditions.rename(columns={'index': 'Condition_no'}, inplace=True)

#Save data into a CSV file
conditions.to_csv('DataResources/conditions.csv', index=False)

In [None]:
#Drugs Table
# Determine the unique values in the Drug column
drugs = pd.DataFrame(df_clean['Drug'].unique(), columns=['Drug'])

# Include index values into the drugs df and rename column as Drugs_no. This column will serve as PK.
drugs.reset_index(inplace=True)
drugs.rename(columns={'index': 'Drugs_no'}, inplace=True)

#Save data into a CSV file
drugs.to_csv('DataResources/drugs.csv', index=False)

In [None]:
#Forms Table

forms = pd.DataFrame(df_clean['Form'].unique(), columns=['Form'])

# Include index values into the forms df and rename column as Form_no. This column will serve as PK.
forms.reset_index(inplace=True)
forms.rename(columns={'index': 'Form_no'}, inplace=True)

#Save data into a CSV file
forms.to_csv('DataResources/forms.csv', index=False)

In [None]:
#Drug Data Table

# Merge all of the previous df (conditions, drugs, forms) with larger df to ensure matching PKs.
drug_data = pd.merge(df_clean, conditions, on='Condition', how='left')
drug_data = pd.merge(drug_data, drugs, on='Drug', how='left')
drug_data = pd.merge(drug_data, forms, on='Form', how='left')

# Drop the actual columns since they were replaced by the XX_no (PK columns), and reorganize column placement.
drug_data =  drug_data.drop(columns=['Condition', 'Drug', 'Form'])\
[['Condition_no', 'Drugs_no', 'Form_no', 'EaseOfUse', 'Effective', 'Price', 'Reviews', 'Satisfaction']]

# Include index values into the drug_data df to serve as table PK.
drug_data.reset_index(inplace=True)
drug_data.rename(columns={'index': 'pk'}, inplace=True)

#Save data into a CSV file
drug_data.to_csv('DataResources/drug_data.csv', index=False)

In [None]:
df_new = df
df_new['merged_column'] = df.apply(lambda row: str(row['Condition']) + '_' +row['Form'], axis=1)
df_new

## PostgresSQL Import

In [None]:
from sqlalchemy import create_engine
import psycopg2 
from sqlalchemy.ext.automap import automap_base

In [None]:
connection_string = 'postgresql://postgres:postgres@127.0.0.1:5432/DrugDataSQL'
engine = create_engine(connection_string)

query_drug = 'SELECT * FROM public."Drug"'
query_data = 'SELECT * FROM public."Drug_data"'
query_conditions = 'SELECT * FROM public."Conditions"'
query_forms = 'SELECT * FROM public."Form"'

df_drug = pd.read_sql(query_drug, engine)
df_data = pd.read_sql(query_data, engine)
df_conditions = pd.read_sql(query_conditions, engine)
df_forms = pd.read_sql(query_forms, engine)

# Bokeh

In [1]:
import numpy as np
import math
import pandas as pd

from bokeh.io import curdoc, show
from bokeh.io import output_notebook, output_file, show
from bokeh.layouts import column, row, layout
from bokeh.models import ColumnDataSource, Div, Select, Slider, TextInput, Spinner, CustomJS, Button, Dropdown, AutocompleteInput, CustomJSFilter
from bokeh.plotting import figure, show
from pathlib import Path


In [12]:
df_new

Unnamed: 0,Condition,Drug,EaseOfUse,Effective,Form,Indication,Price,Reviews,Satisfaction,Type,Drug Form
0,Acute Bacterial Sinusitis,Amoxicillin,3.852353,3.655882,Capsule,On Label,12.590000,86.294118,3.197647,RX,Amoxicillin Capsule (RX)
1,Acute Bacterial Sinusitis,Amoxicillin-Pot Clavulanate,3.470000,3.290000,Liquid (Drink),Off Label,287.370000,43.000000,2.590000,RX,Amoxicillin-Pot Clavulanate Liquid (Drink) (RX)
2,Acute Bacterial Sinusitis,Amoxicillin-Pot Clavulanate,3.121429,2.962857,Tablet,On Label,70.608571,267.285714,2.248571,RX,Amoxicillin-Pot Clavulanate Tablet (RX)
3,Acute Bacterial Sinusitis,Ampicillin,2.000000,3.000000,Capsule,On Label,12.590000,1.000000,1.000000,RX,Ampicillin Capsule (RX)
4,Acute Bacterial Sinusitis,Ampicillin,3.250000,3.000000,Tablet,On Label,125.240000,15.000000,3.000000,RX,Ampicillin Tablet (RX)
...,...,...,...,...,...,...,...,...,...,...,...
680,vulvovaginal candidiasis,Miconazole Nitrate,3.465000,2.770000,Cream,On Label,13.990000,19.500000,2.345000,RX/OTC,Miconazole Nitrate Cream (RX/OTC)
681,vulvovaginal candidiasis,Miconazole-Skin Clnsr17,4.750000,3.000000,Cream,On Label,13.990000,4.000000,3.000000,OTC,Miconazole-Skin Clnsr17 Cream (OTC)
682,vulvovaginal candidiasis,Miconazole-Skin Clnsr17,4.000000,1.000000,Other,On Label,125.990000,1.000000,1.000000,OTC,Miconazole-Skin Clnsr17 Other (OTC)
683,vulvovaginal candidiasis,Terconazole,3.525000,3.047500,Cream,On Label,68.990000,20.000000,2.717500,RX,Terconazole Cream (RX)


In [13]:

Drug_clean_csv = Path("DataResources/Drug_clean.csv")
drug_data = pd.read_csv(Drug_clean_csv)
df_new = drug_data #gap1
df_new['Drug Form'] = drug_data.apply(lambda row: str(row['Drug']) + ' ' +row['Form'] + ' (' +row['Type'] +')' + ' (' +row['Indication'] +')', axis=1).dropna()

df_new_def = df_new[df_new['Condition'] == "Atopic Dermatitis"] #gap2 

Overall = ColumnDataSource(data=df_new)
Curr=ColumnDataSource(data=df_new_def)
p=figure(x_range=Curr.data['Drug Form'], y_range=(0, max(Curr.data['Effective'])), x_axis_label ='Drug Form', y_axis_label = 'Effective') #creating figure object 

#plot and the menu is linked with each other by this callback function
callback = CustomJS(args=dict(source=Overall, sc=Curr, p=p), code="""
var f = cb_obj.value
sc.data['Drug Form']=[]
sc.data['Effective']=[]
for(var i = 0; i <= source.get_length(); i++){
	if (source.data['Condition'][i] == f){
		sc.data['Drug Form'].push(source.data['Drug Form'][i])
		sc.data['Effective'].push(source.data['Effective'][i])
	 }
}   

p.y_range.setv({"start": 0, "end": Math.max.apply(Math, sc.data['Effective']) * 1.1});
p.x_range.setv({"factors": sc.data['Drug Form']});

sc.change.emit();
""")

menu = Select(options=list(df_new['Condition'].unique()),value='Atopic Dermatitis', title = 'Condition')  # drop down menu
# p=figure(x_range=Curr.data['Drug Form'], y_range=(0, max(Curr.data['Effective'])), x_axis_label ='Drug Form', y_axis_label = 'Effective') #creating figure object 
p.vbar(x='Drug Form', top='Effective', width=0.9, color='green', source=Curr)

# p.circle(x='Drug Form', y='Effective', color='green', source=Curr) # plotting the data using glyph circle

p.xaxis.major_label_orientation = 45
menu.js_on_change('value', callback) # calling the function on change of selection
layout=column(menu, p) # creating the layout
show(layout) 