In [1]:
import pandas as pd
import numpy as np
import joblib

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, Binarizer
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline

from feature_engine.imputation import (AddMissingIndicator, MeanMedianImputer, CategoricalImputer)
from feature_engine.encoding import (RareLabelEncoder, OrdinalEncoder)
from feature_engine.transformation import LogTransformer
from feature_engine.selection import DropFeatures
from feature_engine.wrappers import SklearnTransformerWrapper

# 1) Cargamos y dividimos la data en train y test

In [2]:
data = pd.read_csv('dataset.csv')
xTrain, xTest, yTrain, yTest = train_test_split( data.drop(['G3'], axis=1), data['G3'], test_size=0.15, random_state=2021)

# 2) Configuracion del machine learning pipeline

In [4]:
CATEGORICAL_BINARY = ['school', 'sex', 'address', 'famsize', 'Pstatus', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic']
CATEGORICAL_ORDINAL = ['Medu', 'Fedu', 'traveltime', 'studytime', 'failures', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health']
CATEGORICAL_NOMINAL = ['Mjob', 'Fjob', 'reason', 'guardian']
NUMERICALS_LOG_VARS = ['age', 'absences']

In [None]:
gradePipeline = Pipeline([
    #==== TRANSFORMACION DE VARIABLES NUMERICAS ====#
    #- Transformacion logaritmicas
    ('log', LogTransformer(variables=NUMERICALS_LOG_VARS)),
    
    #==== CODIFICACION DE VARIABLES ====#
    #- Categoricas binarias
    ('categorical_binary_encoder', OrdinalEncoder(encoding_method='ordered', variables=CATEGORICAL_BINARY)),

    #- Categoricas Nominales
    ('categorical_encoder', OrdinalEncoder(encoding_method='ordered', variables=CATEGORICAL_NOMINAL)),
    
    #==== Escalado ====# 
    ('scaler', MinMaxScaler()),
    
    #==== Entrenamiento del modelo con Lasso ====#
    ('Lasso', Lasso(alpha=0.01, random_state=2021)),
])
xTrain = xTrain[FEATURES]
    