# Import packages

In [2]:
from sklearn.decomposition import PCA
import numpy as np
import glob
from PIL import Image

# Load dataset

In [3]:
# 이미지 파일 경로 목록 불러오기
image_files = sorted(glob.glob('./data/*.png'))

# 이미지 파일 불러오기
images = []
for image_file in image_files:
    image = Image.open(image_file)
    image = image.convert('L')  # 'L': 8-bit pixels, black and white
    image = np.array(image)
    images.append(image)

# 이미지 배열로 변환
test = np.array(images)

In [4]:
X_test = test.copy()

# Separating data and label

In [5]:
X_test = X_test.astype('float32')  # X_test should be features
X_test /= 255.0  # X_test should be features

In [6]:
X_test_flattened = X_test.reshape(X_test.shape[0], -1)

# Dimensionality Reduction using PCA

In [7]:
seed = 42

pca = PCA(n_components=150, random_state=seed)
X_test_pca = pca.fit_transform(X_test_flattened)  # Apply PCA transform to test data

# 1차적으로 10개 분류

In [9]:
from joblib import dump, load
# 파일에서 모델 불러오기
best_model = load('./svm_model_10.joblib')

# 10개 분류 후 그중 확률이 높은 상위 2개를 또 이진분류

In [10]:
# Make predictions on the test data (probability)
y_test_proba = best_model.predict_proba(X_test_pca)  # Predict with PCA transformed test data

# Get the indices of the top 2 predictions
top2_pred_indices = np.argsort(-y_test_proba, axis=1)[:, :2]

# Print the top 2 predicted labels for the first 10 samples
for i in range(10):
    print(f"Sample {i+1} top 2 predicted labels: {top2_pred_indices[i]}")

Sample 1 top 2 predicted labels: [2 4]
Sample 2 top 2 predicted labels: [0 2]
Sample 3 top 2 predicted labels: [1 3]
Sample 4 top 2 predicted labels: [1 3]
Sample 5 top 2 predicted labels: [8 7]
Sample 6 top 2 predicted labels: [1 3]
Sample 7 top 2 predicted labels: [0 6]
Sample 8 top 2 predicted labels: [9 7]
Sample 9 top 2 predicted labels: [8 5]
Sample 10 top 2 predicted labels: [3 6]


In [11]:
import pickle
with open('./svm_models_2.pkl', 'rb') as f:
    pair_models = pickle.load(f)

In [12]:
# 최종 예측결과를 저장할 배열
test_pred_final = np.zeros(len(X_test))

# 각 샘플에 대해 상위 2개의 라벨을 뽑고, 이 라벨 쌍에 해당하는 SVM 모델을 사용하여 예측
for i in range(len(X_test)):
    top2_labels = tuple(sorted(top2_pred_indices[i]))
    model = pair_models[top2_labels]
    test_pred_final[i] = model.predict([X_test_pca[i]])[0]

# testResult.txt 형식으로 변환

In [13]:
# 최종 예측결과를 int형으로 변환
test_pred_final = test_pred_final.astype(int)

# 파일에 쓰기
with open('./testResult.txt', 'w') as f:
    for i in range(len(test_pred_final)):
        f.write(f"{str(i).zfill(5)} {test_pred_final[i]}\n")