In [1]:
from ml.data.get_data import get_data
from ml.preprocess.preprocessor import Preprocessor
from ml.evaluate.generic_model import GenericModel
from sklearn import svm
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from ml.evaluate.model_evaluator import ModelEvaluator
from ml.model.score import RMSE, MAE
from ml.preprocess.train_test_splitter import TrainTestSplitter
from ml.evaluate.model_evaluator import ScoreCriteria

ModuleNotFoundError: No module named 'ml.preprocess'

In [2]:
dataset = get_data()

In [3]:
set_preprocessor: Preprocessor = Preprocessor(data=dataset, class_column='SalePrice')
processed_df = set_preprocessor.preprocess()

In [4]:
splitter = TrainTestSplitter(df=processed_df, class_column='SalePrice')
X_train, X_valid, Y_train, Y_valid, X_test = splitter.split(test_size=0.2)

In [5]:
models = {
    'SVR': GenericModel(svm.SVR()),
    'RFR': GenericModel(RandomForestRegressor(n_estimators=10)),
    'LR': GenericModel(LinearRegression())
}

In [6]:
evaluator: ModelEvaluator = ModelEvaluator()

scores = {}
for model_name, model in models.items():
    model_scores = scores[model_name] = {}
    model.fit(X_train, Y_train)
    model_scores['RMSE'] = evaluator.evaluate(model, X_valid, Y_valid, RMSE)
    model_scores['MAE'] = evaluator.evaluate(model, X_valid, Y_valid, MAE)

# TODO: make this function accept Score object instead of string
best_model_name:str = evaluator.choose_best_model(scores, ScoreCriteria.MIN, 'RMSE')

In [7]:
best_model = models.get(best_model_name)
evaluator.save_model(best_model, 'model/final_model.pkl')