In [1]:
import pandas as pd
from nebula.data.yg_ar.setup_data_image_hard import read_data
from nebula.common import to_scale_one, write_pickle, read_pickle
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
import os
import os.path as osp
import numpy as np

  warn(f"Failed to load image Python extension: {e}")


In [6]:
def create_label_map(labels):
    label_set = set()
    for lt in labels:
        label_set.add(lt)
        
    label_set = list(label_set)
    label_set.sort()

    label_map = {}
    count = 0
    for l in label_set:
        label_map[l] = count
        count += 1
        
    return label_map

In [10]:
df_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/image_medium_ds_point2_scale.pkl"
df, train_df, test_df, valid_df = read_pickle(df_path)

In [11]:
label_map_a = create_label_map(df["label_a"])
label_map_at = create_label_map(df["label_at"])

In [12]:
train_x = train_df["image"].to_list()

In [13]:
train_y_a = train_df["label_a"].map(label_map_a).to_list()
train_y_at = train_df["label_at"].map(label_map_at).to_list()

In [14]:
test_x = test_df["image"].to_list()

In [15]:
test_y_a = test_df["label_a"].map(label_map_a).to_list()
test_y_at = test_df["label_at"].map(label_map_at).to_list()

In [16]:
def train_svm(data_x, data_y):
    clf = svm.SVC(max_iter=50)
    clf.fit(data_x, data_y)
    return clf


def train_logistic(data_x, data_y):
    clf = LogisticRegression(random_state=0)
    clf.fit(data_x, data_y)
    return clf


def train_gbt(data_x, data_y):
    clf = GradientBoostingClassifier(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=3,
        random_state=0,
        verbose=1,
        n_iter_no_change=2,
    )
    clf.fit(data_x, data_y)
    return clf


def evaluate(model, test_x, test_y):
    res = model.predict(test_x)
    correct = res == test_y
    accuracy = correct.sum() / len(res)
    return res, accuracy


def create_dirs_to_file(path):
    dirs = "/".join(osp.join(path).split("/")[:-1])
    if not osp.exists(dirs):
        os.makedirs(dirs)


def load_or_train(train_x, train_y, test_x, test_y, train_func, label_map, path):
    
    if osp.exists(path):
        return read_pickle(path)
    
    create_dirs_to_file(path)
    
    trained_model = train_func(train_x, train_y)
    predictions, accuracy = evaluate(trained_model, test_x, test_y)
    
    df, df_incorrect, df_correct = format_results(predictions, test_y, label_map)
    
    write_pickle(path, (trained_model, predictions, accuracy, df, df_incorrect, df_correct, label_map)) 
    
    return trained_model, predictions, accuracy, df, df_incorrect, df_correct, label_map


def format_results(predictions, labels, label_map):
    df = pd.DataFrame(
        data={
            "prediction": predictions,
            "label": labels
        }
    )
    df["check"] = df["prediction"] == df["label"]

    label_map_reverse = {v:k for k, v in label_map.items()}

    df["prediction_name"] = df.prediction.map(label_map_reverse)
    df["label_name"] = df.label.map(label_map_reverse)

    df_incorrect = df[~df.check]
    df_correct = df[df.check]

    return df, df_incorrect, df_correct

In [17]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models_medium/svm_a.pkl"
(
    trained_svm_a, 
    predictions_svm_a, 
    accuracy_svm_a, 
    df_svm_a, 
    df_incorrect_svm_a, 
    df_correct_svm_a,
    label_map_svm_a
)= load_or_train(
    train_x, 
    train_y_a, 
    test_x, 
    test_y_a, 
    train_svm, 
    label_map_a, 
    save_path
)
print(accuracy_svm_a)
print(df_incorrect_svm_a.head())

0.3925925925925926
   prediction  label  check prediction_name label_name
2           6      4  False        triangle     childs
5           2      4  False           lotus     childs
7           6      4  False        triangle     childs
8           8      4  False           camel     childs
9           3      4  False      upward_dog     childs


In [18]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models_medium/svm_at.pkl"
(
    trained_svm_at, 
    predictions_svm_at, 
    accuracy_svm_at, 
    df_svm_at,
    df_incorrect_svm_at, 
    df_correct_svm_at,
    label_map_svm_at
)= load_or_train(
    train_x, 
    train_y_at, 
    test_x, 
    test_y_at, 
    train_svm, 
    label_map_at, 
    save_path
)
print(accuracy_svm_at)
print(df_incorrect_svm_at.head())

0.08842592592592592
   prediction  label  check prediction_name label_name
0          32      0  False         lotus_4   childs_2
1          28      0  False         lotus_3   childs_2
2           8      0  False        childs_3   childs_2
3           6      0  False         chair_4   childs_2
5          14      0  False   thunderbolt_4   childs_2


In [19]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models_medium/logistic_a.pkl"
(
    trained_logistic_a, 
    predictions_logistic_a, 
    accuracy_logistic_a, 
    df_logistic_a,
    df_incorrect_logistic_a, 
    df_correct_logistic_a,
    label_map_logistic_a
)= load_or_train(
    train_x, 
    train_y_a, 
    test_x, 
    test_y_a, 
    train_logistic, 
    label_map_a, 
    save_path
)
print(accuracy_logistic_a)
print(df_incorrect_logistic_a.head())

0.2111111111111111
   prediction  label  check prediction_name label_name
1           2      4  False           lotus     childs
2           6      4  False        triangle     childs
5           0      4  False           chair     childs
7           2      4  False           lotus     childs
8           5      4  False     warrior_III     childs


In [20]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models_medium/logistic_at.pkl"
(
    trained_logistic_at, 
    predictions_logistic_at, 
    accuracy_logistic_at, 
    df_logistic_at,
    df_incorrect_logistic_at, 
    df_correct_logistic_at,
    label_map_logistic_at
)= load_or_train(
    train_x, 
    train_y_at, 
    test_x, 
    test_y_at, 
    train_logistic, 
    label_map_at, 
    save_path
)
print(accuracy_logistic_at)
print(df_incorrect_logistic_at.head())

0.059722222222222225
   prediction  label  check prediction_name label_name
0          12      0  False        childs_1   childs_2
1           5      0  False    upward_dog_3   childs_2
2          12      0  False        childs_1   childs_2
3          21      0  False    upward_dog_4   childs_2
5          31      0  False         chair_3   childs_2


In [22]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models_medium/gbt01_a.pkl"
(
    trained_gbt_a, 
    predictions_gbt_a, 
    accuracy_gbt_a, 
    df_gbt_a, 
    df_incorrect_gbt_a, 
    df_correct_gbt_a,
    label_map_gbt_a
)= load_or_train(
    train_x, 
    train_y_a, 
    test_x, 
    test_y_a, 
    train_gbt, 
    label_map_a, 
    save_path
)
print(accuracy_gbt_a)
print(df_incorrect_gbt_a.head())

0.6078703703703704
    prediction  label  check    prediction_name label_name
2            2      4  False             childs      lotus
3            3      4  False  lord_of_the_dance      lotus
7            2      4  False             childs      lotus
8            5      4  False        thunderbolt      lotus
12           0      4  False              camel      lotus


In [23]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models_medium/gbt01_at.pkl"
(
    trained_gbt_at, 
    predictions_gbt_at, 
    accuracy_gbt_at, 
    df_gbt_at, 
    df_incorrect_gbt_at, 
    df_correct_gbt_at,
    label_map_gbt_at
)= load_or_train(
    train_x, 
    train_y_at, 
    test_x, 
    test_y_at, 
    train_gbt, 
    label_map_at, 
    save_path
)
print(accuracy_gbt_at)
print(df_incorrect_gbt_at.head())

0.1388888888888889
   prediction  label  check prediction_name label_name
0          29     18  False    upward_dog_2    lotus_3
1          28     18  False    upward_dog_1    lotus_3
2          37     18  False    warrior_II_2    lotus_3
3           5     18  False         chair_2    lotus_3
4          20     18  False   thunderbolt_1    lotus_3
