In [4]:
import pandas as pd
import random
import os
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

class CFG:
    SEED = 42
    
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(CFG.SEED) # Seed 고정

train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

def get_x_y(df):
    if 'class' in df.columns:
        df_x = df.drop(columns=['id', 'class'])
        df_y = df['class']
        return df_x, df_y
    else:
        df_x = df.drop(columns=['id'])
        return df_x

train_x, train_y = get_x_y(train)
test_x = get_x_y(test)

class_le = preprocessing.LabelEncoder()
snp_le = preprocessing.LabelEncoder()
snp_col = [f'SNP_{str(x).zfill(2)}' for x in range(1,16)]

snp_data = []
for col in snp_col:
    snp_data += list(train_x[col].values)
    
train_y = class_le.fit_transform(train_y)
snp_le.fit(snp_data)

for col in train_x.columns:
    if col in snp_col:
        train_x[col] = snp_le.transform(train_x[col])
        test_x[col] = snp_le.transform(test_x[col])
        
scaler = MinMaxScaler()
train_x[:] = scaler.fit_transform(train_x[:])
test_x[:] = scaler.fit_transform(test_x[:])

classifier = KNeighborsClassifier(n_neighbors = 16, weights='distance', algorithm='brute', 
leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None)

classifier.fit(train_x, train_y)

guesses = classifier.predict(test_x)
submit = pd.read_csv('./sample_submission.csv')
submit['class'] = class_le.inverse_transform(guesses)
submit.to_csv('./answer/submit_para_nor.csv', index=False)