#  기계학습 실습 Week 4 (tabular data를 이미지로 변환 후 classification)

2024.9.23.<br>

서강대 경제대학 양현주 (hyang@sogang.ac.kr)<br><br>


Tabular data source **(Search 'Olive Oil' data)**: <br>
http://www.timeseriesclassification.com/dataset.php <br><br>

Main source of codes: <br>

https://github.com/fastai/fastbook/blob/master/01_intro.ipynb <br>
https://github.com/fastai/fastbook/blob/master/02_production.ipynb <br>
https://gist.github.com/oguiza/c9c373aec07b96047d1ba484f23b7b47 <br><br>

Visual explanation on GAF:<br>

https://medium.com/analytics-vidhya/encoding-time-series-as-images-b043becbdbf3 <br><br>

An example of using GAF to financial data by Chen and Tsai (2020): <br>

https://doi.org/10.1186/s40854-020-00187-0 <br><br>

An example of using GAF to classify human activity with wearable devices: <br>

https://ieeexplore.ieee.org/document/9234451

# 1. Install and import libraries

In [None]:
import fastai
print(fastai.__version__)

from fastai.vision.all import *
#from fastai.text.all import *
#from fastai.collab import *
#from fastai.tabular.all import *

from matplotlib.pyplot import imshow

2.7.17


# 2. Download file

In [None]:
# set the name of the folder to download images
img_folder_nm = 'oliveoil'

# set the name of the zip file to be downloaded
img_zipfile_nm = 'OliveOil.zip'

In [None]:
"""
다운로드 및 압축 풀기 코드 입력
"""

# 3. Load raw files as Pandas dataframes

In [None]:
from pathlib import Path
train_csv = Path('{}/OliveOil_TRAIN.txt'.format(img_folder_nm))
valid_csv = Path('{}/OliveOil_TEST.txt'.format(img_folder_nm))

In [None]:
import pandas as pd
train_df = pd.read_csv(train_csv, delim_whitespace=True, header=None)
train_df

In [None]:
valid_df = pd.read_csv(valid_csv, delim_whitespace=True, header=None)
valid_df

In [None]:
# concatenate train and valid set together

df = pd.concat([train_df, valid_df])
df.shape

# 4. Create label arrays (ground truth data)

In [None]:
y_train = train_df[[0]].to_numpy(dtype=int).reshape(-1)
y_train

In [None]:
y_valid = valid_df[[0]].to_numpy(dtype=int).reshape(-1)
y_valid

# 5. Remove labels from df and scale feature values

In [None]:
# minmax scale

from sklearn.preprocessing import MinMaxScaler

# load scaler
scaler = MinMaxScaler()

# fit
scaler.fit(train_df.iloc[:, 1:]) # fit on train set to get common min and max values

# transform dataframes (take out first column which is label)
X_train = pd.DataFrame(scaler.transform(train_df.iloc[:, 1:]), dtype='float32')
X_valid = pd.DataFrame(scaler.transform(valid_df.iloc[:, 1:]), dtype='float32')

In [None]:
X_train

In [None]:
X_valid

# 6. Plot some data

In [None]:
import matplotlib.pyplot as plt

In [None]:
# plot some rows (class = 1)

plt.figure(figsize=(8, 8))
plt.plot(X_train.iloc[0])
plt.plot(X_train.iloc[1])
plt.show()

In [None]:
# plot some rows (class = 4)

plt.figure(figsize=(8, 8))
plt.plot(X_train.iloc[24])
plt.plot(X_train.iloc[25])
plt.show()

# 7. Convert time series to image (Gramian Angular Difference Field)

In [None]:
!pip install pyts

In [None]:
from pyts.image import GramianAngularField

## 7.1 Transform train dataset to image using GADF

In [None]:
transformer = GramianAngularField(method='difference')
X_train_GAF = transformer.fit_transform(X_train)
X_train_GAF.shape

In [None]:
# check min, max values

np.min(X_train_GAF), np.max(X_train_GAF)

In [None]:
X_train_GAF[0]

In [None]:
# Normalise numbers with range (0,255)

X_train_GAF_L = (((X_train_GAF + 1) / 2)*255).astype(int)

In [None]:
# check min, max values

np.min(X_train_GAF_L), np.max(X_train_GAF_L)

In [None]:
# plot first row (original array)

imshow(X_train_GAF[0])

In [None]:
# plot first row (recaled array)

imshow(X_train_GAF_L[0])

## 7.2 Transform validation dataset to image using GADF

In [None]:
X_valid_GAF = transformer.transform(X_valid)
X_valid_GAF.shape

In [None]:
# check min, max values

np.min(X_valid_GAF), np.max(X_valid_GAF)

In [None]:
X_valid_GAF[0]

In [None]:
# Normalise numbers with range (0,255)

X_valid_GAF_L = (((X_valid_GAF + 1) / 2)*255).astype(int)

In [None]:
# check min, max values

np.min(X_valid_GAF_L), np.max(X_valid_GAF_L)

In [None]:
# plot first row (original array)

imshow(X_valid_GAF[0])

In [None]:
# plot first row (recaled array)

imshow(X_valid_GAF_L[0])

# 8. Save images under class name subfolders

In [None]:
from os.path import join

In [None]:
root_folder = 'gaf_images'

# save train set images (e.g., train/class_name/0.png)

for i in range(len(y_train)):

    # define file names
    image_file_name = str(i) + '.png'

    # define folder names
    class_folder_name = str(y_train[i])
    full_path = os.path.join(root_folder, 'train')
    full_path = os.path.join(full_path, class_folder_name)

    # make folders
    if not os.path.exists(full_path):
        os.makedirs(full_path)

    # save images
    image_file_name_with_full_path = os.path.join(full_path, image_file_name)
    plt.imsave(image_file_name_with_full_path, X_train_GAF_L[i])

In [None]:
# save vaild set images (e.g., valid/class_name/0.png)

for i in range(len(y_valid)):

    # define file names
    image_file_name = str(i) + '.png'

    # define folder names
    class_folder_name = str(y_valid[i])
    full_path = os.path.join(root_folder, 'valid')
    full_path = os.path.join(full_path, class_folder_name)

    # make folders
    if not os.path.exists(full_path):
        os.makedirs(full_path)

    # save images
    image_file_name_with_full_path = os.path.join(full_path, image_file_name)
    plt.imsave(image_file_name_with_full_path, X_valid_GAF_L[i])

# 9. Prepare data for CNN

In [None]:
# data block settings

my_random_seed = 42
my_batch_size = 8

In [None]:
from fastai.vision.data import ImageDataLoaders

In [None]:
data = ImageDataLoaders.from_folder(
    root_folder, train="train", valid='valid',
    seed=my_random_seed, bs=my_batch_size)

In [None]:
len(data.train_ds), len(data.valid_ds)

In [None]:
# show image examples

data.show_batch(max_n=8, nrows=2)

# 10. Train CNN model

In [None]:
"""
CNN classification 코드 입력
Early stopping, patience=10으로
epoch은 50으로
"""

# 11 Confusion matrix and images with top losses

In [None]:
"""
Confusion matrix 그리기
"""