In [79]:
import pandas as pd
from nebula.data.yg_ar.setup_data_image import read_data
from nebula.common import to_scale_one, write_pickle, read_pickle
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
import os
import os.path as osp

In [80]:
def create_label_map(labels):
    label_set = set()
    for lt in labels:
        label_set.add(lt)

    label_map = {}
    count = 0
    for l in label_set:
        label_map[l] = count
        count += 1
        
    return label_map

In [81]:
df_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/image_df.pkl"
random_seed = 1
df, train_df, test_df, valid_df = read_data(df_path, random_seed)

In [82]:
df.head()

Unnamed: 0,image,orientation,xangle,yangle,pants,cloth,hair,action_type,label,label_t,label_th,label_thc,label_thcp
0,"[[116, 116, 116, 116, 116, 116, 116, 116, 116,...",pp,x00,y00,0,0,0,1,camel,camel_1,camel_1_0,camel_1_0_0,camel_1_0_0_0
1,"[[133, 133, 134, 134, 134, 134, 134, 134, 134,...",pp,x05,y00,0,0,0,1,camel,camel_1,camel_1_0,camel_1_0_0,camel_1_0_0_0
2,"[[116, 116, 116, 116, 116, 116, 116, 116, 116,...",np,x00,y05,0,0,0,1,camel,camel_1,camel_1_0,camel_1_0_0,camel_1_0_0_0
3,"[[116, 116, 116, 116, 116, 116, 116, 116, 116,...",pp,x00,y05,0,0,0,1,camel,camel_1,camel_1_0,camel_1_0_0,camel_1_0_0_0
4,"[[134, 134, 134, 134, 134, 134, 134, 134, 134,...",np,x05,y05,0,0,0,1,camel,camel_1,camel_1_0,camel_1_0_0,camel_1_0_0_0


In [83]:
label_map = create_label_map(df["label"])
label_t_map = create_label_map(df["label_t"])
label_th_map = create_label_map(df["label_th"])
label_thc_map = create_label_map(df["label_thc"])
label_thcp_map = create_label_map(df["label_thcp"])

In [84]:
label_map

{'warrior_III': 0,
 'chair': 1,
 'warrior_II': 2,
 'lotus': 3,
 'lord_of_the_dance': 4,
 'triangle': 5,
 'upward_dog': 6,
 'camel': 7,
 'thunderbolt': 8,
 'childs': 9}

In [85]:
label_t_map

{'triangle_1': 0,
 'upward_dog_1': 1,
 'chair_4': 2,
 'warrior_II_2': 3,
 'lord_of_the_dance_1': 4,
 'childs_3': 5,
 'lotus_4': 6,
 'triangle_3': 7,
 'lotus_2': 8,
 'warrior_III_4': 9,
 'lord_of_the_dance_4': 10,
 'upward_dog_2': 11,
 'childs_1': 12,
 'warrior_II_1': 13,
 'childs_2': 14,
 'warrior_III_2': 15,
 'chair_2': 16,
 'warrior_II_4': 17,
 'triangle_2': 18,
 'camel_1': 19,
 'thunderbolt_2': 20,
 'thunderbolt_3': 21,
 'lotus_1': 22,
 'chair_3': 23,
 'thunderbolt_4': 24,
 'camel_4': 25,
 'camel_2': 26,
 'warrior_II_3': 27,
 'warrior_III_3': 28,
 'childs_4': 29,
 'chair_1': 30,
 'camel_3': 31,
 'thunderbolt_1': 32,
 'triangle_4': 33,
 'lotus_3': 34,
 'warrior_III_1': 35,
 'upward_dog_3': 36,
 'lord_of_the_dance_3': 37,
 'lord_of_the_dance_2': 38,
 'upward_dog_4': 39}

In [86]:
train_x = train_df["image"].apply(lambda x: to_scale_one(x, scale=0.2).flatten()).to_list()

In [87]:
train_y = train_df["label"].map(label_map).to_list()
train_y_t = train_df["label_t"].map(label_t_map).to_list()
train_y_th = train_df["label_th"].map(label_th_map).to_list()
train_y_thc = train_df["label_thc"].map(label_thc_map).to_list()
train_y_thcp = train_df["label_thcp"].map(label_thcp_map).to_list()

In [88]:
test_x = test_df["image"].apply(lambda x: to_scale_one(x, scale=0.2).flatten()).to_list()

In [89]:
test_y = test_df["label"].map(label_map).to_list()
test_y_t = test_df["label_t"].map(label_t_map).to_list()
test_y_th = test_df["label_th"].map(label_th_map).to_list()
test_y_thc = test_df["label_thc"].map(label_thc_map).to_list()
test_y_thcp = test_df["label_thcp"].map(label_thcp_map).to_list()

In [90]:
def train_svm(data_x, data_y):
    clf = svm.SVC()
    clf.fit(data_x, data_y)
    return clf


def train_logistic(data_x, data_y):
    clf = LogisticRegression(random_state=0)
    clf.fit(data_x, data_y)
    return clf


def train_gbt_n10_m1(data_x, data_y):
    clf = GradientBoostingClassifier(
        n_estimators=10,
        learning_rate=1,
        max_depth=1,
        random_state=0
    )
    clf.fit(data_x, data_y)
    return clf


def evaluate(model, test_x, test_y):
    res = model.predict(test_x)
    correct = res == test_y
    accuracy = correct.sum() / len(res)
    return res, accuracy


def load_or_train(train_x, train_y, test_x, test_y, train_func, label_map, path):
    
    if osp.exists(path):
        return read_pickle(path)
    
    trained_model = train_func(train_x, train_y)
    predictions, accuracy = evaluate(trained_model, test_x, test_y)
    
    df, df_incorrect, df_correct = format_results(predictions, test_y, label_map)
    
    write_pickle(path, (trained_model, predictions, accuracy, df, df_incorrect, df_correct, label_map)) 
    
    return trained_model, predictions, accuracy, df, df_incorrect, df_correct, label_map


def format_results(predictions, labels, label_map):
    df = pd.DataFrame(
        data={
            "prediction": predictions,
            "label": labels
        }
    )
    df["check"] = df["prediction"] == df["label"]

    label_map_reverse = {v:k for k, v in label_map.items()}

    df["prediction_name"] = df.prediction.map(label_map_reverse)
    df["label_name"] = df.label.map(label_map_reverse)

    df_incorrect = df[~df.check]
    df_correct = df[df.check]

    return df, df_incorrect, df_correct

In [91]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/gbt.pkl"
(
    trained_gbt, 
    predictions_gbt, 
    accuracy_gbt, 
    df_gbt, 
    df_incorrect_gbt, 
    df_correct_gbt,
    label_map_gbt
)= load_or_train(
    train_x, 
    train_y, 
    test_x, 
    test_y, 
    train_gbt_n10_m1, 
    label_map, 
    save_path
)
print(accuracy_gbt)
print(df_incorrect_gbt.head())

0.997872340425532
      prediction  label  check prediction_name   label_name
48             1      0  False           chair  warrior_III
148            7      0  False           camel  warrior_III
176            7      0  False           camel  warrior_III
978            9      2  False          childs   warrior_II
1202           1      2  False           chair   warrior_II


In [92]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/gbt_t.pkl"
(
    trained_gbt_t, 
    predictions_gbt_t, 
    accuracy_gbt_t, 
    df_gbt_t, 
    df_incorrect_gbt_t, 
    df_correct_gbt_t,
    label_map_gbt_t
)= load_or_train(
    train_x, 
    train_y_t, 
    test_x, 
    test_y_t, 
    train_gbt_n10_m1, 
    label_t_map, 
    save_path
)
print(accuracy_gbt_t)
print(df_incorrect_gbt_t.head())

0.007659574468085106
   prediction  label  check prediction_name     label_name
0          29     35  False        childs_4  warrior_III_1
1          29     35  False        childs_4  warrior_III_1
2          29     28  False        childs_4  warrior_III_3
3          29     15  False        childs_4  warrior_III_2
4          29      9  False        childs_4  warrior_III_4


In [93]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/gbt_th.pkl"
(
    trained_gbt_th, 
    predictions_gbt_th, 
    accuracy_gbt_th, 
    df_gbt_th, 
    df_incorrect_gbt_th, 
    df_correct_gbt_th,
    label_map_gbt_th
)= load_or_train(
    train_x, 
    train_y_th, 
    test_x, 
    test_y_th, 
    train_gbt_n10_m1, 
    label_th_map, 
    save_path
)
print(accuracy_gbt_th)
print(df_incorrect_gbt_th.head())

0.010851063829787235
   prediction  label  check prediction_name       label_name
0          63     72  False  warrior_II_4_2  warrior_III_1_0
1          63     55  False  warrior_II_4_2  warrior_III_1_1
2          63    114  False  warrior_II_4_2  warrior_III_3_2
3          63    101  False  warrior_II_4_2  warrior_III_2_0
4          63    127  False  warrior_II_4_2  warrior_III_4_0


In [78]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/gbt_thc.pkl"
(
    trained_gbt_thc, 
    predictions_gbt_thc, 
    accuracy_gbt_thc, 
    df_gbt_thc, 
    df_incorrect_gbt_thc, 
    df_correct_gbt_thc,
    label_map_gbt_thc
)= load_or_train(
    train_x, 
    train_y_thc, 
    test_x, 
    test_y_thc, 
    train_gbt_n10_m1, 
    label_thc_map, 
    save_path
)
print(accuracy_gbt_thc)
print(df_incorrect_gbt_thc.head())

In [None]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/gbt_thcp.pkl"
(
    trained_gbt_thcp, 
    predictions_gbt_thcp, 
    accuracy_gbt_thcp, 
    df_gbt_thcp, 
    df_incorrect_gbt_thcp, 
    df_correct_gbt_thcp,
    label_map_gbt_thcp
)= load_or_train(
    train_x, 
    train_y_thcp, 
    test_x, 
    test_y_thcp, 
    train_gbt_n10_m1, 
    label_thcp_map, 
    save_path
)
print(accuracy_gbt_thcp)
print(df_incorrect_gbt_thcp.head())

In [94]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/svm.pkl"
(
    trained_svm, 
    predictions_svm, 
    accuracy_svm, 
    df_svm, 
    df_incorrect_svm, 
    df_correct_svm,
    label_map_svm
)= load_or_train(
    train_x, 
    train_y, 
    test_x, 
    test_y, 
    train_svm, 
    label_map, 
    save_path
)
print(accuracy_svm)
print(df_incorrect_svm.head())

1.0
Empty DataFrame
Columns: [prediction, label, check, prediction_name, label_name]
Index: []


In [95]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/svm_t.pkl"
(
    trained_svm_t, 
    predictions_svm_t, 
    accuracy_svm_t, 
    df_svm_t,
    df_incorrect_svm_t, 
    df_correct_svm_t,
    label_map_svm_t
)= load_or_train(
    train_x, 
    train_y_t, 
    test_x, 
    test_y_t, 
    train_svm, 
    label_t_map, 
    save_path
)
print(accuracy_svm_t)
print(df_incorrect_svm_t.head())

1.0
Empty DataFrame
Columns: [prediction, label, check, prediction_name, label_name]
Index: []


In [96]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/svm_th.pkl"
(
    trained_svm_th, 
    predictions_svm_th, 
    accuracy_svm_th, 
    df_svm_th,
    df_incorrect_svm_th, 
    df_correct_svm_th,
    label_map_svm_th
)= load_or_train(
    train_x, 
    train_y_th, 
    test_x, 
    test_y_th, 
    train_svm, 
    label_th_map, 
    save_path
)
print(accuracy_svm_th)
print(df_incorrect_svm_th.head())

0.9751063829787234
      prediction  label  check prediction_name      label_name
952           63    102  False  warrior_II_4_2  warrior_II_4_1
993          149      9  False  warrior_II_3_2  warrior_II_3_1
1085         149      9  False  warrior_II_3_2  warrior_II_3_1
1177          63    102  False  warrior_II_4_2  warrior_II_4_1
1268          63    102  False  warrior_II_4_2  warrior_II_4_1


In [97]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/svm_thc.pkl"
(
    trained_svm_thc, 
    predictions_svm_thc, 
    accuracy_svm_thc, 
    df_svm_thc,
    df_incorrect_svm_thc, 
    df_correct_svm_thc,
    label_map_svm_thc
)= load_or_train(
    train_x, 
    train_y_thc, 
    test_x, 
    test_y_thc, 
    train_svm, 
    label_thc_map, 
    save_path
)
print(accuracy_svm_thc)
print(df_incorrect_svm_thc.head())

0.7012765957446808
    prediction  label  check    prediction_name         label_name
0          132    262  False  warrior_III_1_1_3  warrior_III_1_0_3
16         254    582  False  warrior_III_2_0_2  warrior_III_2_0_3
17          97    358  False  warrior_III_2_1_0  warrior_III_2_2_0
27          97    358  False  warrior_III_2_1_0  warrior_III_2_2_0
30         533    262  False  warrior_III_1_0_2  warrior_III_1_0_3


In [98]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/svm_thcp.pkl"
(
    trained_svm_thcp, 
    predictions_svm_thcp, 
    accuracy_svm_thcp, 
    df_svm_thcp,
    df_incorrect_svm_thcp, 
    df_correct_svm_thcp,
    label_map_svm_thcp
)= load_or_train(
    train_x, 
    train_y_thcp, 
    test_x, 
    test_y_thcp, 
    train_svm, 
    label_thcp_map, 
    save_path
)
print(accuracy_svm_thcp)
print(df_incorrect_svm_thcp.head())

0.11808510638297873
   prediction  label  check      prediction_name           label_name
0         836   1188  False  warrior_III_1_1_3_1  warrior_III_1_0_3_1
1        2111   1274  False  warrior_III_1_0_0_3  warrior_III_1_1_0_3
2        1016   2221  False  warrior_III_3_1_1_0  warrior_III_3_2_1_0
3         510    980  False  warrior_III_2_2_3_0  warrior_III_2_0_3_0
4        2034   2058  False  warrior_III_4_0_1_1  warrior_III_4_0_0_1


In [99]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/logistic.pkl"
(
    trained_logistic, 
    predictions_logistic, 
    accuracy_logistic, 
    df_logistic,
    df_incorrect_logistic, 
    df_correct_logistic,
    label_map_logistic
)= load_or_train(
    train_x, 
    train_y, 
    test_x, 
    test_y, 
    train_logistic, 
    label_map, 
    save_path
)
print(accuracy_logistic)
print(df_incorrect_logistic.head())

1.0
Empty DataFrame
Columns: [prediction, label, check, prediction_name, label_name]
Index: []


In [100]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/logistic_t.pkl"
(
    trained_logistic_t, 
    predictions_logistic_t, 
    accuracy_logistic_t, 
    df_logistic_t,
    df_incorrect_logistic_t, 
    df_correct_logistic_t,
    label_map_logistic_t
)= load_or_train(
    train_x, 
    train_y_t, 
    test_x, 
    test_y_t, 
    train_logistic, 
    label_t_map, 
    save_path
)
print(accuracy_logistic_t)
print(df_incorrect_logistic_t.head())

0.9980851063829788
      prediction  label  check prediction_name     label_name
2625           0      7  False      triangle_1     triangle_3
2740           0      7  False      triangle_1     triangle_3
4149          21     24  False   thunderbolt_3  thunderbolt_4
4297           5     12  False        childs_3       childs_1
4452           5     12  False        childs_3       childs_1


In [101]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/logistic_th.pkl"
(
    trained_logistic_th, 
    predictions_logistic_th, 
    accuracy_logistic_th, 
    df_logistic_th,
    df_incorrect_logistic_th, 
    df_correct_logistic_th,
    label_map_logistic_th
)= load_or_train(
    train_x, 
    train_y_th, 
    test_x, 
    test_y_th, 
    train_logistic, 
    label_th_map, 
    save_path
)
print(accuracy_logistic_th)
print(df_incorrect_logistic_th.head())

0.9631914893617022
      prediction  label  check prediction_name label_name
779           92    151  False       chair_1_2  chair_3_2
1414         156     61  False       lotus_3_2  lotus_4_0
1422         116    130  False       lotus_4_2  lotus_4_1
1428          43    135  False       lotus_2_1  lotus_3_1
1436         135    156  False       lotus_3_1  lotus_3_2


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [102]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/logistic_thc.pkl"
(
    trained_logistic_thc, 
    predictions_logistic_thc, 
    accuracy_logistic_thc, 
    df_logistic_thc,
    df_incorrect_logistic_thc, 
    df_correct_logistic_thc,
    label_map_logistic_thc
)= load_or_train(
    train_x, 
    train_y_thc, 
    test_x, 
    test_y_thc, 
    train_logistic, 
    label_thc_map, 
    save_path
)
print(accuracy_logistic_thc)
print(df_incorrect_logistic_thc.head())

0.9131914893617021
    prediction  label  check    prediction_name         label_name
16         254    582  False  warrior_III_2_0_2  warrior_III_2_0_3
51         335    132  False  warrior_III_1_1_2  warrior_III_1_1_3
52         396    117  False  warrior_III_2_2_2  warrior_III_2_2_3
53         638    380  False  warrior_III_3_0_3  warrior_III_3_1_3
56         335    132  False  warrior_III_1_1_2  warrior_III_1_1_3


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [103]:
save_path = "C:/Users/aphri/Documents/t0002/pycharm/data/yg_ar/classic_models/logistic_thcp.pkl"
(
    trained_logistic_thcp, 
    predictions_logistic_thcp, 
    accuracy_logistic_thcp, 
    df_logistic_thcp,
    df_incorrect_logistic_thcp, 
    df_correct_logistic_thcp,
    label_map_logistic_thcp
)= load_or_train(
    train_x, 
    train_y_thcp, 
    test_x, 
    test_y_thcp, 
    train_logistic, 
    label_thcp_map, 
    save_path
)
print(accuracy_logistic_thcp)
print(df_incorrect_logistic_thcp.head())

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.5974468085106382
    prediction  label  check      prediction_name           label_name
0          836   1188  False  warrior_III_1_1_3_1  warrior_III_1_0_3_1
5         2409   1978  False  warrior_III_3_1_1_2  warrior_III_3_2_1_2
6         1551   1998  False  warrior_III_2_0_2_3  warrior_III_2_0_3_3
9         1998   1551  False  warrior_III_2_0_3_3  warrior_III_2_0_2_3
13        1825   2221  False  warrior_III_3_1_1_3  warrior_III_3_2_1_0
