In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import random as r
import seaborn as sns
import matplotlib.pyplot as plt

from time import time
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold

from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, ExtraTreesClassifier, GradientBoostingClassifier, RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from sklearn.metrics import accuracy_score

In [2]:
def breaker() -> None: print("\n" + 50*"*" + "\n")

In [3]:
class CFG(object):
    def __init__(self,
                 seed: int = 42,
                 n_splits: int = 5,
                 show_info: bool = False,
                 ):

        self.seed = seed
        self.n_splits = n_splits
        self.show_info = show_info
        self.features_path = "../input/fic-swin-features"
        self.model_save_path = "models"
        if not os.path.exists(self.model_save_path): os.makedirs(self.model_save_path)

cfg = CFG(seed=42, show_info=True)

In [4]:
y = pd.read_csv("../input/fic-dataframe/train.csv")
y = y["labels"].copy().values

In [5]:
class Model(object):
    def __init__(self, model_name: str, seed: int):
        self.model_name = model_name

        if self.model_name == "lgr":
            self.model = Pipeline(
                steps=[
                    ("classifier", LogisticRegression(random_state=seed)),
                ]
            )
        
        elif self.model_name == "knc":
            self.model = Pipeline(
                steps=[
                    ("classifier", KNeighborsClassifier()),
                ]
            )
        
        elif self.model_name == "svc":
            self.model = Pipeline(
                steps=[
                    ("classifier", SVC()),
                ]
            )
        
        elif self.model_name == "dtc":
            self.model = Pipeline(
                steps=[
                    ("classifier", DecisionTreeClassifier(random_state=seed)),
                ]
            )

        elif self.model_name == "etc":
            self.model = Pipeline(
                steps=[
                    ("classifier", ExtraTreeClassifier(random_state=seed)),
                ]
            )
        
        elif self.model_name == "rfc":
            self.model = Pipeline(
                steps=[
                    ("classifier", RandomForestClassifier(random_state=seed)),
                ]
            )
        
        elif self.model_name == "gbc":
            self.model = Pipeline(
                steps=[
                    ("classifier", GradientBoostingClassifier(random_state=seed)),
                ]
            )
        
        elif self.model_name == "abc":
            self.model = Pipeline(
                steps=[
                    ("classifier", AdaBoostClassifier(random_state=seed)),
                ]
            )
        
        elif self.model_name == "etcs":
            self.model = Pipeline(
                steps=[
                    ("classifier", ExtraTreesClassifier(random_state=seed)),
                ]
            )
        
        elif self.model_name == "gnb":
            self.model = Pipeline(
                steps=[
                    ("classifier", GaussianNB()),
                ]
            )
        
        elif self.model_name == "xgbc":
            self.model = Pipeline(
                steps=[
                    ("classifier", XGBClassifier(random_state=seed)),
                ]
            )
        
        elif self.model_name == "lgbmc":
            self.model = Pipeline(
                steps=[
                    ("classifier", LGBMClassifier(random_state=seed)),
                ]
            )

In [6]:
names: list = ["lgr", "knc", "svc", "dtc", "etc", "rfc", "etcs", "gnb"]

model_names: list = [
    "swin_large_patch4_window12_384_in22k",
    "swin_base_patch4_window12_384_in22k",
]

overall_best_acc: float = 0.0
for model_name in model_names:
    X = np.load(os.path.join(cfg.features_path, f"tr_{model_name}.npy"))
    if cfg.show_info: 
        breaker()
        print(f"{model_name.upper()}")
    for name in names:
        fold: int = 1
        best_name: str = None
        best_acc: float = 0.0
        if cfg.show_info: breaker()
        for tr_idx, va_idx in StratifiedKFold(n_splits=cfg.n_splits, random_state=cfg.seed, shuffle=True).split(X, y):
            X_train, X_valid, y_train, y_valid = X[tr_idx], X[va_idx], y[tr_idx], y[va_idx]
            my_pipeline = Model(name, cfg.seed)
            my_pipeline.model.fit(X_train, y_train)

            y_pred = my_pipeline.model.predict(X_valid)
            if cfg.show_info:
                acc = accuracy_score(y_pred, y_valid)
                print(f"{my_pipeline.model_name}, {fold}, Accuracy : {acc:.5f}")
            else:
                acc = accuracy_score(y_pred, y_valid)

            if acc > overall_best_acc:
                overall_best_acc = acc
                overall_best_name = f"{model_name}_{name}_{fold}"

            if acc > best_acc:
                best_acc = acc
                best_name = f"{model_name}_{name}_{fold}"

            with open(os.path.join(cfg.model_save_path, f"{model_name}_{name}_{fold}.pkl"), "wb") as fp: pickle.dump(my_pipeline.model, fp)
            fold += 1

        print(f"\nBest : {best_name}")


if cfg.show_info: 
    breaker()
    print(f"Overall Best Model : {overall_best_name}")

breaker()


**************************************************

SWIN_LARGE_PATCH4_WINDOW12_384_IN22K

**************************************************



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 1, Accuracy : 0.98060


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 2, Accuracy : 0.96766


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 3, Accuracy : 0.97283


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 4, Accuracy : 0.98189


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 5, Accuracy : 0.98060

Best : swin_large_patch4_window12_384_in22k_lgr_4

**************************************************

knc, 1, Accuracy : 0.97542
knc, 2, Accuracy : 0.96636
knc, 3, Accuracy : 0.96378
knc, 4, Accuracy : 0.97154
knc, 5, Accuracy : 0.97283

Best : swin_large_patch4_window12_384_in22k_knc_1

**************************************************

svc, 1, Accuracy : 0.97154
svc, 2, Accuracy : 0.96636
svc, 3, Accuracy : 0.97283
svc, 4, Accuracy : 0.97671
svc, 5, Accuracy : 0.97930

Best : swin_large_patch4_window12_384_in22k_svc_5

**************************************************

dtc, 1, Accuracy : 0.80207
dtc, 2, Accuracy : 0.83571
dtc, 3, Accuracy : 0.82665
dtc, 4, Accuracy : 0.83441
dtc, 5, Accuracy : 0.80595

Best : swin_large_patch4_window12_384_in22k_dtc_2

**************************************************

etc, 1, Accuracy : 0.65977
etc, 2, Accuracy : 0.66753
etc, 3, Accuracy : 0.66365
etc, 4, Accuracy : 0.67400
etc, 5, Accuracy : 0.67529

Best : swin_larg

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 1, Accuracy : 0.97413


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 2, Accuracy : 0.96895


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 3, Accuracy : 0.96248


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 4, Accuracy : 0.97801


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


lgr, 5, Accuracy : 0.96895

Best : swin_base_patch4_window12_384_in22k_lgr_4

**************************************************

knc, 1, Accuracy : 0.95343
knc, 2, Accuracy : 0.94955
knc, 3, Accuracy : 0.94955
knc, 4, Accuracy : 0.96248
knc, 5, Accuracy : 0.96895

Best : swin_base_patch4_window12_384_in22k_knc_5

**************************************************

svc, 1, Accuracy : 0.94955
svc, 2, Accuracy : 0.94179
svc, 3, Accuracy : 0.94567
svc, 4, Accuracy : 0.95990
svc, 5, Accuracy : 0.96119

Best : swin_base_patch4_window12_384_in22k_svc_5

**************************************************

dtc, 1, Accuracy : 0.80595
dtc, 2, Accuracy : 0.82536
dtc, 3, Accuracy : 0.80983
dtc, 4, Accuracy : 0.83571
dtc, 5, Accuracy : 0.84476

Best : swin_base_patch4_window12_384_in22k_dtc_5

**************************************************

etc, 1, Accuracy : 0.70246
etc, 2, Accuracy : 0.71151
etc, 3, Accuracy : 0.70893
etc, 4, Accuracy : 0.76067
etc, 5, Accuracy : 0.72186

Best : swin_base_pat

In [7]:
ts_features = np.load(os.path.join(cfg.features_path, f"ts_{model_names[0]}.npy"))
model = pickle.load(open(os.path.join(cfg.model_save_path, f"{overall_best_name}.pkl"), "rb"))
y_pred = model.predict(ts_features)

ss_df = pd.read_csv("../input/5-flowers-image-classification/Sample_submission.csv")
ss_df.label = y_pred.astype("uint8")
ss_df.to_csv("submission.csv", index=False)