In [4]:
# 수정 작업 20170310

from django.shortcuts import render, redirect, get_object_or_404
from django.utils import timezone
import re
import math
import pandas as pd
import numpy as np
from bokeh.charts import Bar, Histogram  # defaults, output_file, show
from bokeh.models import Range1d
from bokeh.embed import components
import os


study = get_object_or_404(StudyAnalysis, pk=35)
main_df = pd.read_excel(study.imported_sheet, sheetname=0)

# Change column names and change uppercase characters(organ) into lowercase
main_df.columns = ['ID', 'Organ', 'Lesion size at baseline (mm)', 'Lesion size at post-treatment (mm)']
main_df["Organ"] = main_df["Organ"].str.lower()

# Generate a dataframe for processing the data
process_df = pd.DataFrame({'Patient ID': list(set([int(i) for i in main_df.ID]))})
process_df.loc[:,"Number of solid organ tumor"] = 0
process_df.loc[:,"Number of lymph node"] = 0
process_df.loc[:,"Tumor burden at the baseline (mm)"] = 0
process_df.loc[:,"Tumor burden at the post-treatment (mm)"] = 0
process_df.loc[:,"Lesion size at the baseline (mm)"] = np.nan
process_df.loc[:,"Percent change (%)"] = 0

# Check records and update cells
for record in range(len(main_df.index)):
    record_id = main_df.iloc[:, 0][record]
    lesion_name = main_df.iloc[:, 1][record]
    input_id = record_id - 1

    if re.match("lymph*", lesion_name):
        process_df.loc[input_id,"Number of lymph node"] += 1
    else:
        process_df.loc[input_id,"Number of solid organ tumor"] += 1
    process_df.loc[input_id,"Tumor burden at the baseline (mm)"] += main_df.iloc[:, 2][record]
    process_df.loc[input_id,"Tumor burden at the post-treatment (mm)"] += main_df.iloc[:, 3][record]


process_df.loc[:,"Number of solid organ tumor"] = process_df.loc[:,"Number of solid organ tumor"].astype(int)
process_df.loc[:,"Number of lymph node"] = process_df.loc[:,"Number of lymph node"].astype(int)
process_df.loc[:,"Tumor burden at the baseline (mm)"] = process_df.loc[:,"Tumor burden at the baseline (mm)"].astype(int)
process_df.loc[:,"Tumor burden at the post-treatment (mm)"] = process_df.loc[:,"Tumor burden at the post-treatment (mm)"].astype(int)

# Check processed dataframe and update cells
for record in range(len(process_df.index)):
    process_df.loc[record, 'Percent change (%)'] = math.floor((process_df.loc[record, "Tumor burden at the post-treatment (mm)"] - process_df.loc[record, "Tumor burden at the baseline (mm)"]) / process_df.loc[record, "Tumor burden at the baseline (mm)"] * 100)
    if process_df.loc[record, "Number of lymph node"] + process_df.loc[record, "Number of solid organ tumor"] == 1:
        process_df.loc[record, "Lesion size at the baseline (mm)"] = int(main_df.loc[main_df['ID'] == record + 1]['Lesion size at baseline (mm)'])

process_df

Unnamed: 0,Patient ID,Number of solid organ tumor,Number of lymph node,Tumor burden at the baseline (mm),Tumor burden at the post-treatment (mm),Lesion size at the baseline (mm),Percent change (%)
0,1,2,1,140,155,,10
1,2,1,2,144,154,,6
2,3,1,2,148,149,,0
3,4,3,2,332,320,,-4
4,5,2,1,161,150,,-7
5,6,3,2,388,335,,-14
6,7,2,1,233,195,,-17
7,8,0,2,138,95,,-32
8,9,2,0,62,52,,-17
9,10,3,1,373,239,,-36
