In [11]:
import pandas as pd
import numpy as np
import shutil
import datetime
import os
import csv
import matplotlib.pyplot as plt

In [18]:
# 主要函式
# path為要讀取的資料夾 此資料夾包含所有病患的資料夾
# time_step為時間間隔 單位為分鐘 請輸入20 or 30 ...
def box_plot(path, time_step, year=datetime.datetime.now().year, month=datetime.datetime.now().month, day=datetime.datetime.now().day
             , hours=datetime.datetime.now().hour, minutes=datetime.datetime.now().minute):
    now = datetime.datetime(year, month, day, hours, minutes)
    
    if os.path.isdir(path + '_plot') :
        shutil.rmtree(path + '_plot', ignore_errors=True)
    os.mkdir( path + '_plot') # 圖檔要存的地方 建立一個新的資料夾
    
    pid_list = os.listdir(path) 
    for pid in pid_list: # 進入每一個資料夾
        if 'check' in pid: continue
        path2 = path + '/' + pid
        
        csv_num = 0 # 有幾個csv檔
        for i in os.listdir(path2):
            if 'csv' in i: csv_num+=1
        if csv_num>1:# 如果有兩個以上的csv 則將所有的csv彙整成一個csv
            gather_csv(path2)
        
        for file in os.listdir(path2):
            if 'check' in file: continue
            if 'xlsx' in file: continue
            df = pd.read_csv( path + '/' + pid + '/' + file, header = None)
#             dataframe的標題分別對應到的值
#             0  1    2    3  4    5 
#             ID date time hp temp move
            df[1] = [datetime.datetime.strptime(i, '%Y-%m-%d') for i in df[1].values]
            df[2] = [datetime.datetime.strptime(i, '%H:%M:%S').time() for i in df[2].values]
            os.mkdir( path + '_plot/' + pid[:4]) # 一個人的圖分別存在一個新的資料夾

            date_list = list(dict.fromkeys(df[1].values))
            
            start = datetime.datetime(year, month, day, 8, 0)
            steps = int(np.floor((now-start)/datetime.timedelta(minutes = time_step)))
            hour_list = np.array([ start +datetime.timedelta(minutes=i*time_step) for i in range(int(steps))]) # 從早上8.開始到你輸入所需時間的區間
            for d_idx in range(len(date_list)):
                date = df[1].values
                d_filter = (date>=date_list[d_idx])&(date<(date_list[d_idx]+np.timedelta64(1,'D')))
                time_zone = 'From '+np.datetime_as_string(date_list[d_idx], unit = 'D')+' to '+np.datetime_as_string(date_list[d_idx]+np.timedelta64(1,'D'), unit = 'D')

                data = (df[4].values.astype('int'))[d_filter] # each temp data
                hour = (df[2].values)[d_filter] # each date each hour
                data = [i/100 for i in data.astype('int')]
                data = np.asarray(data) # 將溫度換算成小數點

                each_hour = []
                for h_idx in hour_list:
                    h_filter = (hour>=h_idx.time())&(hour<(h_idx+datetime.timedelta(hours = 1)).time())
                    tmp = data[(h_filter)&(np.array(data)>25)&(np.array(data)<45)]
                    each_hour.append(tmp)

                # 開始畫圖
                plt.figure(figsize = (15,8))
                plt.plot(np.linspace(1, steps, steps), [35 for i in range(steps)], 'g:')
                box_plot = plt.boxplot(each_hour, flierprops  = dict(markerfacecolor = 'b', marker = '.'))
                
#                 將medians, q1, q3分別記錄下來 並存成csv
                med_data = [] # med_data = [['date', 'time', 'q1', 'q3', 'medians']]
                for q_idx in range(len(box_plot['medians'])):
                    med = box_plot['medians'][q_idx].get_ydata()
                    q1 = box_plot['whiskers'][2*q_idx].get_ydata()
                    q3 = box_plot['whiskers'][2*q_idx+1].get_ydata()
                    tmp = [np.datetime_as_string(date_list[d_idx], unit = 'D'), q_idx+1, q1[0], q3[0], med[0]]
                    med_data.append(tmp)
#                 存成圖檔
                with open(pid[:4] + '_medians.csv' , 'w', newline='') as csvfile:
                    writer = csv.writer(csvfile)
                    med_df = pd.DataFrame(med_data)
                    print (med_df)
                    writer.writerows(med_data)
                
                time_tag = [datetime.datetime.strftime(i, "%H:%M") for i in hour_list]
                plt.xticks(np.linspace(1, steps, steps), time_tag, rotation='vertical')
                plt.xlabel('Hours')
                plt.ylabel('Temperature')
                plt.suptitle(time_zone)
                plt.savefig( path + '_plot/' +  pid[:4] + '/' +  pid[:4] +' box plot '+time_zone+'.png') # 存檔名稱
                plt.clf()
        #             plt.show()
        #     print(time_zone)

In [None]:
import warnings
warnings.filterwarnings('ignore')

box_plot('ICU20200211', 30)

In [None]:
# 當遇到一個檔案裡面有兩個以上的csv時 就呼叫它
# 這裡的path需要打上病人的資料夾名稱
# ex: path = 'ICU20200211/132'
#     gather_csv(path)

def gather_csv(path):
    pid_list = os.listdir(path)
    pt_info = []
    watch_id = ''
    
    pid_list = [i for i in pid_list if ('check' or 'xlsx') not in i]
    # remove useless file
    
    for pid in pid_list:
        watch_id = pid[:4]
        with open( path + '/' + pid , 'r') as f:
            for line in f.readlines():
                line = line.strip('\n')
                line = line.split(',')
                pt_info.append(line)
        os.remove(path+'/'+pid)
        
    df = pd.DataFrame(pt_info)
    df_list = list(df.drop_duplicates(1)[1]) #用來找日期 等等才能命名
    date_list = []
    
    for i in range(len(df_list)):
        date_list.append(dt.date.today() - pd.to_datetime(df_list[i], format='%Y-%m-%d').date())
    first = df_list[date_list.index(max(date_list))]
    end = df_list[date_list.index(min(date_list))]
    
    with open( path + '/' + watch_id + '_' + first + ',' + end + '.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(pt_info)