In [None]:
import json
from collections import Counter
from pathlib import Path

import numpy as np
import pandas as pd
import pendulum
from sklearn import linear_model, svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import auc, classification_report, f1_score, roc_curve
from sklearn.model_selection import (GridSearchCV, cross_val_score,
                                     cross_validate, train_test_split)
# from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler, Normalizer
from sklearn.svm import SVC
from tqdm.notebook import tqdm

In [None]:
all_df = pd.read_csv("csv", index_col="uid")
Y = all_df[["C3DV_Intrinsic_goal", "C3DV_Extrinsic_goal"]]
# Y = all_df[["C2DV_Intrinsic_goal", "C2DV_Extrinsic_goal", "C3DV_Intrinsic_goal", "C3DV_Extrinsic_goal"]]

X_e = all_df.iloc[:, 94:99]
X = all_df.iloc[:, 149:]
X = pd.concat([X, X_e], axis=1)
X = X.drop(columns=[c for c in X.columns if c.startswith("wc")])
# print(",".join([_x for _x in X.columns]))
X.to_csv("csv")

# 1
# scaler = MinMaxScaler()
# X = scaler.fit_transform(X)

# 2
scaler = Normalizer()
X = scaler.fit_transform(X) # 数据归一化
X.shape

In [None]:
models = {
    "LR": linear_model.LogisticRegression(C=1, solver="newton-cg"),
    "K-Neighbors": KNeighborsClassifier(),
    "SVM": svm.SVC(C=1, probability=True),
    "Random Forest": RandomForestClassifier(),
    "NN": MLPClassifier((128, 64, 32), max_iter=500)
}

for model_name, clf in models.items():
    rsts = {}
    print("*" * 50)
    print(f'Model: {model_name}')
    
    for col_name, y in Y.iteritems():
        if not col_name.endswith("goal"):
            continue
        y = np.array(y)
        
        metrics = ('accuracy', 'f1_micro', 'roc_auc_ovr')
        scores = cross_validate(clf, X, y, cv=10, scoring=metrics)
        # print(scores)

        rsts[col_name] = {
            met: scores["test_" + met].mean() for met in metrics
        }
    # # print("- * " * 20)

    rsts = pd.DataFrame(rsts) * 100
    rsts = rsts.T
    display(rsts)
    # rsts.to_csv(f"rsts-model={model_name}.csv", float_format="%.4f")