# Import library

In [7]:
import os, pickle, json

import pandas as pd

from sklearn.metrics import mean_squared_error, mean_absolute_error

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import  AdaBoostRegressor, BaggingRegressor

# Load test set

In [4]:
df = pd.read_csv('../dataset/cleaned/test_processed_df.csv')

print('load data finished')
print('-'*30)

df = df.reset_index()


y = df['completion-time-in-minutes']
x = df.drop(['completion-time-in-minutes', 'index'], axis=1)

print('prepare data finished')
print('-'*30)
print('total test set:', len(x))

load data finished
------------------------------
prepare data finished
------------------------------


# Evaluation

To evaluate the model, mean squared error (MSE) and mean absolute error (MAE) are measured.

In [5]:
model_dir = '../model/'

def evaluate(model_name):

    print('evaluating model', model_name)

    with open(os.path.join(model_dir, model_name, 'model.pkl'), 'rb') as f:
        model = pickle.load(f)

    ## just for the case that linear regression is evaluated
    try:
        with open(os.path.join(model_dir, model_name, 'best_params.json'), 'r') as f:
            best_params = json.load(f)

        
        print('best params')

        for k,v in best_params.items():
            print('  {}:{}'.format(k,v))
    except:
        pass

    pred = model.predict(x)

    mse = mean_squared_error(y, pred)
    mae = mean_absolute_error(y, pred)

    print('mean squared error:', round(mse,2))
    print('mean absolute error:', round(mae,2))
    print('-'*30)

In [None]:
evaluate('LinearRegression')

In [8]:
evaluate('Lasso')

evaluating model Lasso
best params
  alpha:1
  max_iter:100
mean squared error: 2592.24
mean absolute error: 2.69
------------------------------
