In [1]:
import joblib
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
import copy

In [2]:
def load_data():
    X_train_vect = joblib.load('Test Kompas\X_train_vect.pkl')
    X_valid_vect = joblib.load('Test Kompas\X_valid_vect.pkl')
    X_test_vect = joblib.load('Test Kompas\X_test_vect.pkl')
    y_train = joblib.load('Test Kompas\y_train.pkl')
    y_valid = joblib.load('Test Kompas\y_valid.pkl')
    y_test = joblib.load('Test Kompas\y_test.pkl')

    return X_train_vect, X_valid_vect, X_test_vect, y_train, y_valid, y_test

In [3]:
X_train_vect, X_valid_vect, X_test_vect, y_train, y_valid, y_test = load_data()

In [4]:
def create_model_param():
    nb_params = {
    'alpha': [0.1, 0.5, 1.0]
    }
    
    lgr_params = {
        'penalty': ['l2'],
        'C': [0.01, 0.1],
        'max_iter': [100, 300, 500]
    }

    svc_params = {
    'C': [0.1, 1],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto']
    }

    # Create model params
    list_of_param = {
        'LogisticRegression': lgr_params,
        'SVC': svc_params,
        'MultinomialNB': nb_params
    }

    return list_of_param

In [5]:
create_model_param()

{'LogisticRegression': {'penalty': ['l2'],
  'C': [0.01, 0.1],
  'max_iter': [100, 300, 500]},
 'SVC': {'C': [0.1, 1],
  'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
  'gamma': ['scale', 'auto']},
 'MultinomialNB': {'alpha': [0.1, 0.5, 1.0]}}

In [6]:
def create_model_object():
    # Buat model object
    lgr = LogisticRegression()
    nb = MultinomialNB()
    svc = SVC()

    # Buat list model
    list_of_model = [
        {'model_name': lgr.__class__.__name__, 'model_object': lgr},
        {'model_name': nb.__class__.__name__, 'model_object': nb},
        {'model_name': svc.__class__.__name__, 'model_object': svc}
    ]

    return list_of_model

In [7]:
create_model_object()

[{'model_name': 'LogisticRegression', 'model_object': LogisticRegression()},
 {'model_name': 'MultinomialNB', 'model_object': MultinomialNB()},
 {'model_name': 'SVC', 'model_object': SVC()}]

In [8]:
def train_model():
    # Buat list params dan object model
    list_of_param = create_model_param()
    list_of_model = create_model_object()

    # Buat dictionary kosong untuk model yg sudah dilatih
    list_of_tuned_model = {}

    # Train model
    for base_model in list_of_model:
        model_name = base_model['model_name']
        model_obj = copy.deepcopy(base_model['model_object'])
        model_param = list_of_param[model_name]

        print('Training model :', model_name)

        model = GridSearchCV(estimator = model_obj,
                             param_grid = model_param,
                             cv = 5,
                             n_jobs=1,
                             verbose=10,
                             scoring = 'accuracy')

        # Train model
        model.fit(X_train_vect, y_train)

        # Predict
        y_pred_train = model.predict(X_train_vect)
        y_pred_valid = model.predict(X_valid_vect)

        # Get score
        train_score = accuracy_score(y_train, y_pred_train)
        valid_score = accuracy_score(y_valid, y_pred_valid)

        # Append
        list_of_tuned_model[model_name] = {
            'model': model,
            'train_auc': train_score,
            'valid_auc': valid_score,
            'best_params': model.best_params_
        }
        
        print("Done training")
        print("")

    # Dump data
    joblib.dump(list_of_param, 'list_of_param.pkl')
    joblib.dump(list_of_model, 'list_of_model.pkl')
    joblib.dump(list_of_tuned_model, 'list_of_tuned_model.pkl')
   
    return list_of_param, list_of_model, list_of_tuned_model

In [9]:
list_of_param, list_of_model, list_of_tuned_model = train_model()

Training model : LogisticRegression
Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV 1/5; 1/6] START C=0.01, max_iter=100, penalty=l2............................




[CV 1/5; 1/6] END C=0.01, max_iter=100, penalty=l2;, score=0.322 total time=   1.1s
[CV 2/5; 1/6] START C=0.01, max_iter=100, penalty=l2............................
[CV 2/5; 1/6] END C=0.01, max_iter=100, penalty=l2;, score=0.312 total time=   1.2s
[CV 3/5; 1/6] START C=0.01, max_iter=100, penalty=l2............................
[CV 3/5; 1/6] END C=0.01, max_iter=100, penalty=l2;, score=0.317 total time=   1.2s
[CV 4/5; 1/6] START C=0.01, max_iter=100, penalty=l2............................
[CV 4/5; 1/6] END C=0.01, max_iter=100, penalty=l2;, score=0.342 total time=   1.1s
[CV 5/5; 1/6] START C=0.01, max_iter=100, penalty=l2............................
[CV 5/5; 1/6] END C=0.01, max_iter=100, penalty=l2;, score=0.361 total time=   1.1s
[CV 1/5; 2/6] START C=0.01, max_iter=300, penalty=l2............................
[CV 1/5; 2/6] END C=0.01, max_iter=300, penalty=l2;, score=0.322 total time=   1.2s
[CV 2/5; 2/6] START C=0.01, max_iter=300, penalty=l2............................
[CV 2/5; 2



[CV 1/5; 3/3] END ....................alpha=1.0;, score=0.438 total time=   0.0s
[CV 2/5; 3/3] START alpha=1.0...................................................
[CV 2/5; 3/3] END ....................alpha=1.0;, score=0.428 total time=   0.0s
[CV 3/5; 3/3] START alpha=1.0...................................................
[CV 3/5; 3/3] END ....................alpha=1.0;, score=0.423 total time=   0.0s
[CV 4/5; 3/3] START alpha=1.0...................................................
[CV 4/5; 3/3] END ....................alpha=1.0;, score=0.433 total time=   0.0s
[CV 5/5; 3/3] START alpha=1.0...................................................
[CV 5/5; 3/3] END ....................alpha=1.0;, score=0.439 total time=   0.0s
Done training

Training model : SVC
Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5; 1/16] START C=0.1, gamma=scale, kernel=linear..........................




[CV 1/5; 1/16] END C=0.1, gamma=scale, kernel=linear;, score=0.312 total time=   3.3s
[CV 2/5; 1/16] START C=0.1, gamma=scale, kernel=linear..........................
[CV 2/5; 1/16] END C=0.1, gamma=scale, kernel=linear;, score=0.334 total time=   3.3s
[CV 3/5; 1/16] START C=0.1, gamma=scale, kernel=linear..........................
[CV 3/5; 1/16] END C=0.1, gamma=scale, kernel=linear;, score=0.301 total time=   3.3s
[CV 4/5; 1/16] START C=0.1, gamma=scale, kernel=linear..........................
[CV 4/5; 1/16] END C=0.1, gamma=scale, kernel=linear;, score=0.310 total time=   3.3s
[CV 5/5; 1/16] START C=0.1, gamma=scale, kernel=linear..........................
[CV 5/5; 1/16] END C=0.1, gamma=scale, kernel=linear;, score=0.370 total time=   3.3s
[CV 1/5; 2/16] START C=0.1, gamma=scale, kernel=poly............................
[CV 1/5; 2/16] END C=0.1, gamma=scale, kernel=poly;, score=0.212 total time=   4.8s
[CV 2/5; 2/16] START C=0.1, gamma=scale, kernel=poly............................


In [10]:
def get_best_model():
    # Load tuned model
    list_of_tuned_model = joblib.load('list_of_tuned_model.pkl')

    # Get the best model
    best_model_name = None
    best_model = None
    best_performance = -99999
    best_model_param = None

    for model_name, model in list_of_tuned_model.items():
        if model['valid_auc'] > best_performance:
            best_model_name = model_name
            best_model = model['model']
            best_performance = model['valid_auc']
            best_model_param = model['best_params']

    # Dump the best model
    joblib.dump(best_model, 'best_model.pkl')

    # Print
    print('=============================================')
    print('Best model        :', best_model_name)
    print('Metric score      :', best_performance)
    print('Best model params :', best_model_param)
    print('=============================================')

    return best_model



In [11]:
get_best_model()

Best model        : SVC
Metric score      : 0.7025
Best model params : {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
