### DATA IMAGE GENERATOR

In [1]:
# 39개 마커
MARKERS = ['LFHD', 'RFHD', 'LBHD', 'RBHD', 'C7', 'LSHO', 'RSHO', 'CLAV', 'RBAK',
           'LUPA', 'RUPA', 'STRN', 'T10', 'LELB', 'RELB', 'LFRM', 'RFRM',
           'LWRA', 'RWRA', 'LWRB', 'RWRB', 'LFIN', 'RFIN', 'LASI', 'RASI',
           'LPSI', 'RPSI', 'LTHI', 'RTHI', 'LKNE', 'RKNE', 'LTIB', 'RTIB',
           'LANK', 'RANK', 'LTOE', 'RTOE', 'LHEE', 'RHEE']
len(MARKERS)

39

In [2]:
import os
import glob

import pandas as pd
import numpy as np
from PIL import Image 
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
DATAPATH = "/Users/sujinlee/Desktop/nims/pd/dataset"
CONTROL = os.path.join(DATAPATH, "Controls")
PD = os.path.join(DATAPATH, "PD")

df_patients = pd.read_csv(os.path.join(DATAPATH, "patients.csv"))
df_null_info = pd.read_csv(os.path.join(DATAPATH, "NULL_info.csv"))

In [4]:
# 사용하지 않을 데이터 \
# PD/KMS_FW3.csv : 원본 데이터에 ‘RFHD’ 마커가 없음. \
# PD/BGH_FW1.csv : NULL인 컬럼이 너무 많음.

EXCLUDE = [["KMS", "FW3"], ["BGH", "FW1"]]

In [5]:
print("PD count:", np.sum(df_patients[df_patients["Category"] == "PD"]["cntFW"].values) + np.sum(df_patients[df_patients["Category"] == "PD"]["cntBW"].values))
print("Controls count:", np.sum(df_patients[df_patients["Category"] == "Controls"]["cntFW"].values) + np.sum(df_patients[df_patients["Category"] == "Controls"]["cntBW"].values))

PD count: 475
Controls count: 132


In [7]:
# set path
DATAPATH_IMG = DATAPATH + "_img1"
DATAPATH_NP = DATAPATH + "_np1"

os.mkdir(DATAPATH_IMG)
os.mkdir(os.path.join(DATAPATH_IMG, "Controls"))
os.mkdir(os.path.join(DATAPATH_IMG, "PD"))

os.mkdir(DATAPATH_NP)
os.mkdir(os.path.join(DATAPATH_NP, "Controls"))
os.mkdir(os.path.join(DATAPATH_NP, "PD"))

In [8]:
# 전체 데이터 x,y,z
for cate, name, nFW, nBW in zip(df_patients["Category"].values, df_patients["Patient"].values, df_patients["cntFW"].values, df_patients["cntBW"].values):    
    for idx in range(1, nFW+1):
        
        if not (name == "KMS" and idx == 3) and not (name == "BGH" and idx == 1):
            
            df = pd.read_csv(os.path.join(os.path.join(DATAPATH, cate), f"PREP_{name}_FW{idx}.csv"))
            df_sample = df.copy()

            # x, y, z 추출
            df_x = df_sample.filter(regex='X')
            df_y = df_sample.filter(regex='Y')
            df_z = df_sample.filter(regex='Z')

            # MARKERS 컬럼들인지 확인 
            print("LEN X:", len(df_x.columns))
            print("LEN Y:", len(df_y.columns))
            print("LEN Z:", len(df_z.columns))

            assert len(df_x.columns) == 39 and len(set([x.split('_')[0] for x in df_x]) - set(MARKERS)) == 0
            assert len(df_y.columns) == 39 and len(set([x.split('_')[0] for x in df_y]) - set(MARKERS)) == 0
            assert len(df_z.columns) == 39 and len(set([x.split('_')[0] for x in df_z]) - set(MARKERS)) == 0

            # 컬럼 순서 재 확인
            df_x = df_x[[m + "_X" for m in MARKERS]]
            df_y = df_y[[m + "_Y" for m in MARKERS]]
            df_z = df_z[[m + "_Z" for m in MARKERS]]

            # 3차원 벡터 생성
            df_img = np.stack([df_x, df_y, df_z], -1)
            
            # save numpy 
            np.save(os.path.join(os.path.join(DATAPATH_NP, cate), f"NP_{name}_FW{idx}.npy"), df_img)

            # save img
            from PIL import Image
            df_img_jpg = Image.fromarray(df_img.astype(np.uint8))
            df_img_jpg.save(os.path.join(os.path.join(DATAPATH_IMG, cate), f"IMG_{name}_FW{idx}.jpg"))

            print(name, f"FW{idx}")
            print(len(df))

    for idx in range(1, nBW+1):
        
        if not (name == "BGH" and idx == 1):
        
            df = pd.read_csv(os.path.join(os.path.join(DATAPATH, cate), f"PREP_{name}_BW{idx}.csv"))
            df_sample = df.copy()

            # x, y, z 추출
            df_x = df_sample.filter(regex='X')
            df_y = df_sample.filter(regex='Y')
            df_z = df_sample.filter(regex='Z')

            # MARKERS 컬럼들인지 확인 
            print("LEN X:", len(df_x.columns))
            print("LEN Y:", len(df_y.columns))
            print("LEN Z:", len(df_z.columns))

            assert len(df_x.columns) == 39 and len(set([x.split('_')[0] for x in df_x]) - set(MARKERS)) == 0
            assert len(df_y.columns) == 39 and len(set([x.split('_')[0] for x in df_y]) - set(MARKERS)) == 0
            assert len(df_z.columns) == 39 and len(set([x.split('_')[0] for x in df_z]) - set(MARKERS)) == 0

            # 컬럼 순서 재 확인
            df_x = df_x[[m + "_X" for m in MARKERS]]
            df_y = df_y[[m + "_Y" for m in MARKERS]]
            df_z = df_z[[m + "_Z" for m in MARKERS]]

            # 3차원 벡터 생성
            df_img = np.stack([df_x, df_y, df_z], -1)
            
            # save numpy 
            np.save(os.path.join(os.path.join(DATAPATH_NP, cate), f"NP_{name}_BW{idx}.npy"), df_img)

            # save img
            from PIL import Image
            df_img_jpg = Image.fromarray(df_img.astype(np.uint8))
            df_img_jpg.save(os.path.join(os.path.join(DATAPATH_IMG, cate), f"IMG_{name}_BW{idx}.jpg"))

            print(name, f"BW{idx}")

LEN X: 39
LEN Y: 39
LEN Z: 39
AMJ FW1
441
LEN X: 39
LEN Y: 39
LEN Z: 39
AMJ FW2
339
LEN X: 39
LEN Y: 39
LEN Z: 39
AMJ FW3
355
LEN X: 39
LEN Y: 39
LEN Z: 39
AMJ BW1
LEN X: 39
LEN Y: 39
LEN Z: 39
AMJ BW2
LEN X: 39
LEN Y: 39
LEN Z: 39
AMJ BW3
LEN X: 39
LEN Y: 39
LEN Z: 39
BDY FW1
312
LEN X: 39
LEN Y: 39
LEN Z: 39
BDY FW2
284
LEN X: 39
LEN Y: 39
LEN Z: 39
BDY FW3
300
LEN X: 39
LEN Y: 39
LEN Z: 39
BDY BW1
LEN X: 39
LEN Y: 39
LEN Z: 39
BDY BW2
LEN X: 39
LEN Y: 39
LEN Z: 39
BDY BW3
LEN X: 39
LEN Y: 39
LEN Z: 39
BGH FW2
219
LEN X: 39
LEN Y: 39
LEN Z: 39
BGH FW3
237
LEN X: 39
LEN Y: 39
LEN Z: 39
BGH BW2
LEN X: 39
LEN Y: 39
LEN Z: 39
BGH BW3
LEN X: 39
LEN Y: 39
LEN Z: 39
BHJ FW1
674
LEN X: 39
LEN Y: 39
LEN Z: 39
BHJ FW2
597
LEN X: 39
LEN Y: 39
LEN Z: 39
BHJ FW3
653
LEN X: 39
LEN Y: 39
LEN Z: 39
BHJ BW1
LEN X: 39
LEN Y: 39
LEN Z: 39
BHJ BW2
LEN X: 39
LEN Y: 39
LEN Z: 39
BHJ BW3
LEN X: 39
LEN Y: 39
LEN Z: 39
BSB FW1
711
LEN X: 39
LEN Y: 39
LEN Z: 39
BSB FW2
812
LEN X: 39
LEN Y: 39
LEN Z: 39
BSB FW