In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
data_test = pd.read_csv('../data/raw/test.csv')
data_test.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [5]:
with open('../artifacts/feature_eng_configs.pkl', 'rb') as f:
    feature_eng_configs = pickle.load(f)

feature_eng_configs

{'age_imputed_value': 29,
 'embarked_imputed_value': 'S',
 'codificador_embarked': Embarked
 S    646
 C    168
 Q     77
 Name: count, dtype: int64}

In [8]:
feature_eng_configs['codificador_embarked']

Embarked
S    646
C    168
Q     77
Name: count, dtype: int64

In [9]:
data_test.drop(['PassengerId', 'Name', 'Ticket'], axis=1, inplace=True)

# eliminamos Cabin por muchos faltantes.
data_test.drop(['Cabin'], axis=1, inplace=True)

# imputamos age.
data_test['Age'] = data_test['Age'].fillna(feature_eng_configs['age_imputed_value'])

# imputamos Embarked
data_test['Embarked'] = data_test['Embarked'].fillna(feature_eng_configs['embarked_imputed_value'])

# codificamos variable Sex con OHE
data_test['Sex'] = pd.get_dummies(data_test['Sex'], drop_first=True).astype(int)

# codificamos variable Embarked con Frequency Encoding.
data_test['Embarked'] = data_test['Embarked'].map(feature_eng_configs['codificador_embarked'])

data_test.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,1,34.5,0,0,7.8292,77
1,3,0,47.0,1,0,7.0,646
2,2,1,62.0,0,0,9.6875,77
3,3,1,27.0,0,0,8.6625,646
4,3,0,22.0,1,1,12.2875,646


Estandarizamos las variables antes de predecir.

In [None]:
with open('../artifacts/std_scaler.pkl', 'rb') as f:
    std_scaler = pickle.load(f)
std_scaler

In [22]:
# data_test.isnull().mean()
data_test.dropna(axis=0, inplace=True)

In [23]:
X_data_test_std = std_scaler.transform(data_test)

In [None]:
with open('../models/logistic_regresssino_v1.pkl', 'rb') as f:
    modelo = pickle.load(f)
modelo

In [None]:
model_predicts = modelo. (X_data_test_std)
model_predicts

array([0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,