In [83]:
# Import all packages 
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
from text_similarity import process_text, calculate_similarity
import tkinter as tk
from tkinter import simpledialog
from tensorflow.keras.models import load_model

Load the ongoing clinical trial as dataframe

In [84]:
# Allow the dataframe to show all columns
pd.set_option('display.max_columns', None)

In [85]:
# Import the ongoing clincial trial data as dataframe 
ongoing_trial=pd.read_csv('ongoing_trial_cleaned.csv', index_col=False)

In [86]:
# Make a copy of dataframe so that the following operation will not affect the orignal data
ongoing_trial_copy=ongoing_trial.copy()

Collect patient's info that need for analysis

In [87]:
#Create a dialog box to allow user input their info
ROOT_input = tk.Tk()
ROOT_input.withdraw()
ROOT_input.geometry('1920x1080')
name= simpledialog.askstring(title="Personal Info",prompt="What's your Name?:")
gender=simpledialog.askstring(title="Personal Info",prompt="What's your gender? Male or Female:")
countries=simpledialog.askstring(title="Personal Info",prompt="Which contries you are living in?:")
age= simpledialog.askstring(title="Personal Info",prompt="Please enter your age:")
disease = simpledialog.askstring(title="Personal Info",prompt="What type of disease you got:")
description= simpledialog.askstring(title="Personal Info",prompt="Please descrip your disease:")

In [88]:
# Standardize the input format
gender=gender.capitalize()
countries=countries.title()
age=float(age)
disease= disease.lower()

In [89]:
# filter out the clincial trial that patient' age is eligible for
ongoing_trial_filtered_age = ongoing_trial_copy[(age >= ongoing_trial_copy['minimum_age']) & (age < ongoing_trial_copy['maximum_age'])]

In [90]:
# filter out the clincial trial that patient' gender is eligible for
ongoing_trial_filtered_age_gender = ongoing_trial_filtered_age[(gender == ongoing_trial_filtered_age['gender']) 
                                                                      | ( ongoing_trial_filtered_age['gender']=="All")]

In [91]:
# filter out the clincial trial that targeting patient's disease
ongoing_trial_filtered = ongoing_trial_filtered_age_gender[ongoing_trial_filtered_age_gender.apply(
                                                                    lambda row: row.astype(str).str.contains(disease).any(), axis=1)]

Use machine learning to predict the ongoing clinical trial's outcome

In [92]:
# Make a copy of data so that the following operation will not ruin the orignial one
ongoing_trial_filtered_ml=ongoing_trial_filtered.copy()

In [93]:
#Check whether there is any trial filter 
if len(ongoing_trial_filtered_ml)>0:
    pass
else:
    print('I cannot give you any recommandation, please consult you physician for more information')
    exit()

I cannot give you any recommandation, please consult you physician for more information


In [94]:
# Import the pre-trained neutral network model
nn_model = load_model('best_model_nn.keras')



In [95]:
# Split X and y set and drop unnecessary column
columns_to_drop = ['overall_status', 'nct_id', 'description','detailed_description','official_title','city','state','countries_name','contact_name',
                  'email','phone','phone_extension','official_title','facility_name']
X = ongoing_trial_filtered_ml.drop(columns_to_drop, axis=1)  
y = ongoing_trial_filtered_ml['overall_status'] 

In [None]:
# Import the encoder that used for training this model
column_transformer = joblib.load('column_transformer.joblib')

In [None]:
# Apply encoder
X = column_transformer.transform(X)

In [None]:
# Make prediction
predictions = nn_model.predict(X)
predicted_labels = np.argmax(predictions, axis=1)

In [None]:
# Add the prediction into a column
ongoing_trial_filtered['predicted_outcome'] = predicted_labels

Use NLP model to find how close the clinical that fit for patient's disease description

In [None]:
# Analysis the patient's disease description
main_text=process_text(description)

In [None]:
# Use pre-set function to calculate the cosine distance between patient description and clincial's description
ongoing_trial_filtered.loc[:, 'similarities'] = ongoing_trial_filtered.apply(lambda row: calculate_similarity(row['description'], main_text), axis=1
)

Final recommendation

In [None]:
# Copy the dataframe to avoid futher process influence the orignal data
ongoing_trial_filtered_copy=ongoing_trial_filtered.copy()

In [None]:
# Delete all trials that marked as fail in prediction
ongoing_trial_filtered_copy= ongoing_trial_filtered_copy[ongoing_trial_filtered_copy['predicted_outcome']==1]

In [None]:
# filter out the clincial trial that taking place in patient's country of residence
ongoing_trial_filtered_copy_country = ongoing_trial_filtered_copy[ongoing_trial_filtered_copy
                                                                   ['country'].str.contains(countries, case=False)]

In [None]:
#Check whether there is any trial filter 
if len(ongoing_trial_filtered_copy_country)>0:
    pass
elif len(ongoing_trial_filtered_copy)>0:
    pass
else:
    print('I cannot give you any recommandation, please consult you physician for more information')
    exit()

In [None]:
# Sort the list based on similar of content
ongoing_trial_filtered_copy_country = ongoing_trial_filtered_copy_country.sort_values(by='similarities', ascending=False)
ongoing_trial_filtered_copy = ongoing_trial_filtered_copy.sort_values(by='similarities', ascending=False)

In [None]:
ongoing_trial_filtered_copy_country

In [None]:
ongoing_trial_filtered_copy

In [None]:
# Provide the recommadtion
if len(ongoing_trial_filtered_copy_country)>0 and len (ongoing_trial_filtered_copy_country[ongoing_trial_filtered_copy_country['similarities']>=0.1])>0:
    best_nctid = ongoing_trial_filtered_copy_country.iloc[0]['nct_id']
    best_trial_title=ongoing_trial_filtered_copy_country.iloc[0]['official_title']
    print(f'I suggest you would participate clincial trial "{best_trial_title}".You can lookup for more information on https://clinicaltrials.gov and search NCT ID: {best_nctid} for more information. You can find out the location and contact information on this website. But always consult you physician before you made any decision. I wish all the best to you')
elif len(ongoing_trial_filtered_copy)>0 and len (ongoing_trial_filtered_copy[ongoing_trial_filtered_copy['similarities']>=0.1])>0:
    best_nctid = ongoing_trial_filtered_copy.iloc[0]['nct_id']
    best_trial_title=ongoing_trial_filtered_copy.iloc[0]['official_title']
    print(f'I suggest you would participate clincial trial "{best_trial_title}".You can lookup for more information on https://clinicaltrials.gov and search NCT ID: {best_nctid} for more information. You can find out the location and contact information on this website. This clinical trial conduct outside your country. But always consult you physician before you made any decision. I wish all the best to you')
else:
    print('I cannot give you any recommandation, please consult you physician for more information')