In [32]:
# load the train and test
# train algo
# save the metrices, params
import os
import warnings
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import ElasticNet
from src.get_data import read_params
import argparse
import joblib
import json


def eval_metrics(actual, pred):

    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


In [40]:
config = read_params("params.yaml")

test_data_path = config["split_data"]["test_path"]
train_data_path = config["split_data"]["train_path"]
random_state = config["base"]["random_state"]
model_dir = config["model_dir"]

alpha = config["estimators"]["ElasticNet"]["params"]["alpha"]
l1_ratio = config["estimators"]["ElasticNet"]["params"]["l1_ratio"]

target = [config["base"]["target_col"]]
target_col = config["base"]["target_col"]

train = pd.read_csv(train_data_path, sep=",")

In [41]:
test = pd.read_csv(test_data_path, sep=",")

In [42]:
train.columns[1]

'volatile acidity'

In [None]:


train_y = train[target_col]
test_y = test[target_col]

train_x = train.drop(target, axis=1)
test_x = test.drop(target, axis=1)

lr = ElasticNet(
    alpha=alpha, 
    l1_ratio=l1_ratio, 
    random_state=random_state)
lr.fit(train_x, train_y)

predicted_qualities = lr.predict(test_x)

(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)


os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, "model.joblib")

joblib.dump(lr, model_path)