In [12]:
from glob import glob
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm

In [13]:
## 경로 설정
test_csv = './archive/fashion-mnist_test.csv'
label_txt = './private_test_dataset/label.txt'
private_test_img_dir_path = './private_test_dataset/data'

test = pd.read_csv(test_csv)

df = pd.DataFrame(columns=list(test.columns))

with open(label_txt, 'r') as file1:
    labels = file1.readlines()
    
l = np.array([label.strip().split()[1] for label in labels])

img_path = glob(private_test_img_dir_path + '/*')
img_path = sorted(img_path)

df_list = []
for idx, path in enumerate(img_path) :
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img = list(img.reshape(-1))
    img.insert(0, -1)
    
    df_list.append(img)

import csv

csv_file_path = './private_data.csv'

df.to_csv(csv_file_path, index=False)
with open(csv_file_path, 'a', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)

    # 각 행을 CSV 파일에 쓰기
    for row in df_list:
        csv_writer.writerow(row)

In [14]:
# Load training data
training_data = pd.read_csv('./archive/fashion-mnist_train.csv')
train_y = training_data['label']
train_X = training_data.drop('label', axis=1)

# Load test data 
public_test_data = pd.read_csv('./public/public_data.csv')
private_test_data= pd.read_csv('./private/private_data.csv')

public_test_y = public_test_data['label']
public_test_X = public_test_data.drop('label', axis=1)

private_test_y = private_test_data['label']
private_test_X = private_test_data.drop('label', axis=1)

In [15]:
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [16]:
datagen = ImageDataGenerator(
    rotation_range=20,  # 랜덤 회전 각도 범위 [-20, 20]도
    width_shift_range=0.1,  # 랜덤 좌우 이동
    height_shift_range=0.1,  # 랜덤 상하 이동
    shear_range=0.2,  # 변형
    zoom_range=0.2,  # 랜덤 확대/축소
    fill_mode='constant',
    cval=0  # 신규로 생성된 픽셀을 채우는 데 사용되는 값
)

# 가정: train_X가 NumPy 배열임
# 데이터를 28x28 이미지로 재구성
reshaped_train_X = train_X.values.reshape(-1, 28, 28, 1)  # 그레이스케일 이미지를 가정


In [17]:
aug_X = []
aug_y = []

# 원본 샘플 당 증강 횟수
augmentation_factor = 60
for X_batch, y_batch in datagen.flow(reshaped_train_X, train_y, batch_size=32):
    aug_X.append(X_batch)
    aug_y.append(y_batch)
    
    if len(aug_X) >= augmentation_factor:
        break

# 리스트를 NumPy 배열로 변환
aug_X = np.concatenate(aug_X)
aug_y = np.concatenate(aug_y)
aug_X = aug_X.reshape(aug_X.shape[0], -1)
# 원본 훈련 데이터를 평평하게 만듦
train_X = reshaped_train_X.reshape(reshaped_train_X.shape[0], -1)

# 원본과 증강된 데이터를 연결
X_pre = np.concatenate([train_X, aug_X])
y_pre = np.concatenate([train_y, aug_y])

In [None]:
pca = PCA(n_components=380)
pca.fit(X_pre.reshape(len(X_pre), -1))
pca_train_X = pca.transform(X_pre.reshape(len(X_pre), -1))
public_test_X = pca.transform(public_test_X.values.reshape(len(public_test_X), -1))
private_test_X = pca.transform(private_test_X.values.reshape(len(private_test_X), -1))

# 전체 훈련 세트에서 모델 훈련
svc = SVC(gamma='scale', kernel='rbf', C=8)
svc.fit(pca_train_X, y_pre)

# 테스트 세트에서 예측
public_preds = svc.predict(public_test_X)
private_preds = svc.predict(private_test_X)

public_preds_dict = {image: label for image, label in enumerate(public_preds)}
private_preds_dict = {image: label for image, label in enumerate(private_preds)}

with open('./public/pca380_aug60_public_testResult.txt', 'w') as file:
    for image, label in public_preds_dict.items():
        file.write(f'{image:05d} {label}\n')

with open('./private/pca380_aug60_private_testResult.txt', 'w') as file:
    for image, label in private_preds_dict.items():
        file.write(f'{image:05d} {label}\n')
