In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import random as r
import seaborn as sns
import matplotlib.pyplot as plt

from time import time
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score

In [2]:
def breaker() -> None: print("\n" + 50*"*" + "\n")

In [3]:
df = pd.read_csv("../input/4ac-dataframe/data.csv")
y_1 = df.classes.copy().values
df = pd.read_csv("../input/4ac-swinfeatures-ml-infer/submission.csv")
y_2 = df.Label.copy().values

y = np.concatenate((y_1, y_2), axis=0)

In [4]:
class CFG(object):
    def __init__(self,
                 seed: int = 42,
                 n_splits: int = 5,
                 show_info: bool = False,
                 ):

        self.seed = seed
        self.n_splits = n_splits
        self.show_info = show_info
        self.features_path = "../input/4actrsfsna384"
        self.model_save_path = "models"
        if not os.path.exists(self.model_save_path): os.makedirs(self.model_save_path)

cfg = CFG(seed=42, show_info=True)

In [5]:
class Pipelines(object):
    def __init__(self, model_name: str, seed: int):
        self.model_name = model_name

        if self.model_name == "lgr":
            self.model = Pipeline(
                steps=[
                    ("classifier", LogisticRegression(random_state=seed, max_iter=1000)),
                ]
            )
        
        elif self.model_name == "knc":
            self.model = Pipeline(
                steps=[
                    ("classifier", KNeighborsClassifier()),
                ]
            )

        elif self.model_name == "svc":
            self.model = Pipeline(
                steps=[
                    ("classifier", SVC(random_state=seed)),
                ]
            )
        
        elif self.model_name == "rfc":
            self.model = Pipeline(
                steps=[
                    ("classifier", RandomForestClassifier(random_state=seed)),
                ]
            )
        
        elif self.model_name == "xgb":
            self.model = Pipeline(
                steps=[
                    ("classifier", XGBClassifier(random_state=seed)),
                ]
            )

In [6]:
names: list = [
    "lgr", 
    "knc", 
    "rfc",
    "xgb",
    "svc"
]

model_names: list = [
    "swin_base_patch4_window12_384_in22k",
    "swin_large_patch4_window12_384_in22k",
]

overall_best_acc: float = 0.0

for model_name in model_names:
    X = np.load(os.path.join(cfg.features_path, f"{model_name}.npy"))
    if cfg.show_info: 
        breaker()
        print(f"{model_name.upper()}")
    for name in names:
        fold: int = 1
        best_name: str = None
        best_acc: float = 0.0
        if cfg.show_info: breaker()
        for tr_idx, va_idx in StratifiedKFold(n_splits=cfg.n_splits, random_state=cfg.seed, shuffle=True).split(X, y):
            X_train, X_valid, y_train, y_valid = X[tr_idx], X[va_idx], y[tr_idx], y[va_idx]
            my_pipeline = Pipelines(name, cfg.seed)
            my_pipeline.model.fit(X_train, y_train)

            y_pred = my_pipeline.model.predict(X_valid)
            if cfg.show_info:
                acc = accuracy_score(y_pred, y_valid)
                print(f"{my_pipeline.model_name}, {fold}, Accuracy : {acc:.5f}")
            else:
                acc = accuracy_score(y_pred, y_valid)

            if acc > overall_best_acc:
                overall_best_acc = acc
                overall_best_name = f"{model_name}_{name}_{fold}"
            
            if acc > best_acc:
                best_acc = acc
                best_name = f"{model_name}_{name}_{fold}"

            with open(os.path.join(cfg.model_save_path, f"{model_name}_{name}_{fold}.pkl"), "wb") as fp: pickle.dump(my_pipeline.model, fp)
            fold += 1
        
        print(f"\nBest : {best_name}")
    

if cfg.show_info: 
    breaker()
    print(f"Overall Best Model : {overall_best_name}")

breaker()


**************************************************

SWIN_BASE_PATCH4_WINDOW12_384_IN22K

**************************************************

lgr, 1, Accuracy : 0.99717
lgr, 2, Accuracy : 0.99717
lgr, 3, Accuracy : 0.99433
lgr, 4, Accuracy : 0.99575
lgr, 5, Accuracy : 0.99858

Best : swin_base_patch4_window12_384_in22k_lgr_5

**************************************************

knc, 1, Accuracy : 0.99717
knc, 2, Accuracy : 0.99433
knc, 3, Accuracy : 0.99008
knc, 4, Accuracy : 0.99717
knc, 5, Accuracy : 0.99291

Best : swin_base_patch4_window12_384_in22k_knc_1

**************************************************

rfc, 1, Accuracy : 0.99575
rfc, 2, Accuracy : 0.99433
rfc, 3, Accuracy : 0.98442
rfc, 4, Accuracy : 0.99292
rfc, 5, Accuracy : 0.98865

Best : swin_base_patch4_window12_384_in22k_rfc_1

**************************************************

xgb, 1, Accuracy : 0.99150
xgb, 2, Accuracy : 0.98725
xgb, 3, Accuracy : 0.98867
xgb, 4, Accuracy : 0.99292
xgb, 5, Accuracy : 0.98723

Best : s