In [4]:
# 수정 작업 20170310

from django.shortcuts import render, redirect, get_object_or_404
from django.utils import timezone
import re
import math
import pandas as pd
import numpy as np
from bokeh.charts import Bar, Histogram  # defaults, output_file, show
from bokeh.models import Range1d
from bokeh.embed import components
import os


study = get_object_or_404(StudyAnalysis, pk=35)
main_df = pd.read_excel(study.imported_sheet, sheetname=0)

# Change column names and change uppercase characters(organ) into lowercase
main_df.columns = ['ID', 'Organ', 'Lesion size at baseline (mm)', 'Lesion size at post-treatment (mm)']
main_df["Organ"] = main_df["Organ"].str.lower()

# Generate a dataframe for processing the data
process_df = pd.DataFrame({'Patient ID': list(set([int(i) for i in main_df.ID]))})
process_df.loc[:,"Number of solid organ tumor"] = 0
process_df.loc[:,"Number of lymph node"] = 0
process_df.loc[:,"Tumor burden at the baseline (mm)"] = 0
process_df.loc[:,"Tumor burden at the post-treatment (mm)"] = 0
process_df.loc[:,"Lesion size at the baseline (mm)"] = np.nan
process_df.loc[:,"Percent change (%)"] = 0

# Check records and update cells
for record in range(len(main_df.index)):
    record_id = main_df.iloc[:, 0][record]
    lesion_name = main_df.iloc[:, 1][record]
    input_id = record_id - 1

    if re.match("lymph*", lesion_name):
        process_df.loc[input_id,"Number of lymph node"] += 1
    else:
        process_df.loc[input_id,"Number of solid organ tumor"] += 1
    process_df.loc[input_id,"Tumor burden at the baseline (mm)"] += main_df.iloc[:, 2][record]
    process_df.loc[input_id,"Tumor burden at the post-treatment (mm)"] += main_df.iloc[:, 3][record]


process_df.loc[:,"Number of solid organ tumor"] = process_df.loc[:,"Number of solid organ tumor"].astype(int)
process_df.loc[:,"Number of lymph node"] = process_df.loc[:,"Number of lymph node"].astype(int)
process_df.loc[:,"Tumor burden at the baseline (mm)"] = process_df.loc[:,"Tumor burden at the baseline (mm)"].astype(int)
process_df.loc[:,"Tumor burden at the post-treatment (mm)"] = process_df.loc[:,"Tumor burden at the post-treatment (mm)"].astype(int)

# Check processed dataframe and update cells
for record in range(len(process_df.index)):
    process_df.loc[record, 'Percent change (%)'] = math.floor((process_df.loc[record, "Tumor burden at the post-treatment (mm)"] - process_df.loc[record, "Tumor burden at the baseline (mm)"]) / process_df.loc[record, "Tumor burden at the baseline (mm)"] * 100)
    if process_df.loc[record, "Number of lymph node"] + process_df.loc[record, "Number of solid organ tumor"] == 1:
        process_df.loc[record, "Lesion size at the baseline (mm)"] = int(main_df.loc[main_df['ID'] == record + 1]['Lesion size at baseline (mm)'])

process_df

Unnamed: 0,Patient ID,Number of solid organ tumor,Number of lymph node,Tumor burden at the baseline (mm),Tumor burden at the post-treatment (mm),Lesion size at the baseline (mm),Percent change (%)
0,1,2,1,140,155,,10
1,2,1,2,144,154,,6
2,3,1,2,148,149,,0
3,4,3,2,332,320,,-4
4,5,2,1,161,150,,-7
5,6,3,2,388,335,,-14
6,7,2,1,233,195,,-17
7,8,0,2,138,95,,-32
8,9,2,0,62,52,,-17
9,10,3,1,373,239,,-36


In [3]:

from bokeh.io import output_notebook, show
output_notebook()

In [37]:
from django.shortcuts import render, redirect, get_object_or_404
from django.utils import timezone
import re
import math
import pandas as pd
import numpy as np
from bokeh.charts import Bar, Histogram, output_file
from bokeh.io import output_notebook, show
from bokeh.models import Range1d
from bokeh.embed import components
from bokeh.charts import defaults
import os


study = get_object_or_404(StudyAnalysis, pk=35)

input_df = study.reassessed_df

# 1) Add rows of UP patients.
base_index = len(input_df.index)
for i in range(study.up_patients):
    input_df.loc[base_index + i, "new_PR"] = 0
    input_df.loc[base_index + i, "new_PRO"] = 1

# 2) Generate 1000 sets of new variables(1or0) following bernoulli distribution.
# Key = Patient ID, Value = Bernoulli random variable
bernoulli_dict_PR = {}
bernoulli_dict_PR0 = {}
# Column = Key = Patients' ID, Row = Value = 1000 Bernoulli random variables derived from patients new_PR or new_PRO proportion
for record in range(len(input_df.index)):
    bernoulli_dict_PR[record] = np.random.choice([0, 1], size=(1000,), p=[1-input_df.loc[record, 'new_PR'], input_df.loc[record, 'new_PR']])
    bernoulli_dict_PR0[record] = np.random.choice([0, 1], size=(1000,), p=[1-input_df.loc[record, 'new_PRO'], input_df.loc[record, 'new_PRO']])
    # bernoulli_dict_PR[record] = bernoulli.rvs(input_df.loc[record, 'new_PR'], size=1000)
    # bernoulli_dict_PR0[record] = bernoulli.rvs(input_df.loc[record, 'new_PRO'], size=1000)

# 3) Make dataframes of variables and calculate each trials' (rows') means.
PR_df = pd.DataFrame(bernoulli_dict_PR)
PR_df.loc[:, "NewProb(PR)"] = PR_df.sum(axis=1) / len(PR_df.columns)
PRO_df = pd.DataFrame(bernoulli_dict_PR0)
PRO_df.loc[:, "NewProb(PRO)"] = PRO_df.sum(axis=1) / len(PRO_df.columns)

# 4) Make new dataframes with calculated means and find quantile numbers
new_data = {'Index': [i + 1 for i in range(len(PR_df.index))],
           'Probability of PR (%)': sorted((PR_df.loc[:, "NewProb(PR)"] * 100).astype(int), reverse=False)}
sorted_PR_df = pd.DataFrame(new_data)

quantile_bottom_pr = sorted_PR_df.loc[25, "Probability of PR (%)"] # 26th
quantile_top_pr = sorted_PR_df.loc[974, "Probability of PR (%)"] # 975th
quantile_median_pr = int((sorted_PR_df.loc[499, "Probability of PR (%)"] + sorted_PR_df.loc[500, "Probability of PR (%)"]) / 2)

new_data = {'Index': [i + 1 for i in range(len(PRO_df.index))],
           'Probability of PRO (%)': sorted((PRO_df.loc[:, "NewProb(PRO)"] * 100).astype(int), reverse=False)}
sorted_PRO_df = pd.DataFrame(new_data)

quantile_bottom_pro = sorted_PRO_df.loc[25, "Probability of PRO (%)"] # 26th
quantile_top_pro = sorted_PRO_df.loc[974, "Probability of PRO (%)"] # 975th
quantile_median_pro = int((sorted_PRO_df.loc[499, "Probability of PRO (%)"] + sorted_PRO_df.loc[500, "Probability of PRO (%)"]) / 2)

# # 5) Draw histogram plots for visualizing calculation results.
# pr_plot = Histogram(sorted_PRO_df, values='Probability of PRO (%)', bins=7, bar_width=4, color='blue', title='', ylabel='', xlabel='') # 15
# pr_plot.title.text_font = "Roboto Slab"
# # pr_plot.background_fill_alpha = 0
# # pr_plot.border_fill_color = None
# pr_plot.x_range = Range1d(quantile_bottom_pro-20, quantile_top_pro+20)
# pr_plot.width = 600    # default : 600
# pr_plot.height = 250    # default : 600

# 5) Draw histogram plots for visualizing calculation results.
pr_plot = Bar(sorted_PR_df, label='Probability of PR (%)', bar_width=1, values="Index", agg="count", color='blue', title='', ylabel='', xlabel='', legend=False,
             ) # 15
# pr_plot = Histogram(sorted_PR_df, values='Probability of PR (%)', bins=7, color='blue', title='', ylabel='', xlabel='') # 15
pr_plot.title.text_font = "Roboto Slab"
# pr_plot.background_fill_alpha = 0
# pr_plot.border_fill_color = None
# pr_plot.x_range = Range1d(0, 100)
pr_plot.width = 600    # default : 600
pr_plot.height = 250    # default : 600

show(pr_plot)

from bokeh.sampledata.autompg import autompg as df


In [1]:
from bokeh.io import output_notebook, show
output_notebook()

In [None]:
from django.shortcuts import render, redirect, get_object_or_404
from django.utils import timezone
import re
import math
import pandas as pd
import numpy as np
from bokeh.charts import Bar, Histogram, output_file
from bokeh.io import output_notebook, show
from bokeh.models import Range1d
from bokeh.embed import components
from bokeh.charts import defaults
import os



study = get_object_or_404(StudyAnalysis, pk=pk)
processed_df = study.processed_df
up_patients = study.up_patients
num_all_patients = len(processed_df.index) + up_patients

# Calculate the proportions of patients based on diagnosis results.
num_partial_response = num_progression = 0
for id in range(len(processed_df.index)):
    if processed_df.loc[:, "Percent change (%)"][id] <= -30:
        num_partial_response += 1
    elif processed_df.loc[:, "Percent change (%)"][id] >= 20:
        num_progression += 1

partial_response_prop = round(num_partial_response / num_all_patients * 100, 2)
progression_prop = round((num_progression + up_patients) / num_all_patients * 100, 2)

# Draw a plot for visualizing patients' diagnosis results.
new_data = {'Index': [i + 1 for i in range(len(processed_df.index))],
           'Percent change (%)': sorted(processed_df.loc[:, "Percent change (%)"], reverse=True)}
sorted_df = pd.DataFrame(new_data)

study.sorted_df = sorted_df
study.save()

sorted_plot = Bar(sorted_df, values='Percent change (%)', color="White", title='Percent change (%)', legend=None, ylabel="")
sorted_plot.y_range = Range1d(-100, 100)
sorted_plot.xaxis.visible = False
sorted_plot.title.text_font = "Roboto Slab"
sorted_plot.background_fill_alpha = 0
sorted_plot.border_fill_color = None
sorted_plot.width = 600    # default : 600
sorted_plot.height = 250    # default : 600

script, div = components(sorted_plot)