In [1]:
seed = 161
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Composicion de pipelines
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import MinMaxScaler

# Regresion lineal
from sklearn.linear_model import LinearRegression

# Importar/ Exportar modelos
from joblib import dump, load

# Metricas
from sklearn.metrics import mean_squared_error as mse

# q-q plots
import scipy.stats as stats

In [2]:
# Se cargan los datos. 
file_name = './Data/university_admission_train.csv'
df_original = pd.read_csv(file_name, sep=',', encoding = 'utf-8', index_col=0)
var_obj = 'Admission Points'

In [3]:
df_prep = df_original.copy()
df_prep = df_prep.dropna(subset=[var_obj])

In [4]:
# Columnas a utilizar para la regresion
selected_cols = ['CGPA', 'University Rating', 'Research']
# Se define el transformador que se usará para normalizar las varibales numéricas: MinMaxScaler()
numeric_transformer = Pipeline(steps=[('scaler', MinMaxScaler())])

#Column transformer para especificar las transformaciones sobre las columnas seleccionadas
preprocessor = ColumnTransformer(
    transformers=[
        ('selector', 'passthrough', selected_cols),
        ('num', numeric_transformer, selected_cols)])
pre = [('initial', preprocessor),]
model = [('model', LinearRegression())]

 # Declara el pipeline utilizando los transformadores (pre) y especificando el modelo o tipo de clasificador (model)
pipeline = Pipeline(pre+model)

In [5]:
 # Extraemos las variables explicativas y objetivo para entrenar
X = df_prep.drop(var_obj, axis = 1)
y = df_prep[var_obj]

pipeline = pipeline.fit(X,y)

In [6]:
# Usamos la lbreria joblib
filename = 'modelo.joblib'
# Se guarda
dump(pipeline, "LAB4 - API/assets/"+filename) 

['LAB4 - API/assets/modelo.joblib']