In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

In [2]:
dataset = pd.read_csv("../data/raw/train.csv")
#dataset = pd.read_csv("train.csv")
dataset.head()

Unnamed: 0,CustomerID,Age,Gender,Income,VisitFrequency,AverageSpend,PreferredCuisine,TimeOfVisit,GroupSize,DiningOccasion,MealType,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,WaitTime,ServiceRating,FoodRating,AmbianceRating,HighSatisfaction
0,1457,34,Female,138842,Weekly,162.954929,Indian,Dinner,8,Celebration,Dine-in,1,0,1,4.228618,1,5,3,1
1,1371,54,Male,98671,Rarely,66.918873,Indian,Lunch,4,Casual,Takeaway,0,0,0,50.247186,1,2,2,0
2,1505,47,Male,122351,Monthly,94.12767,Indian,Lunch,7,Casual,Takeaway,0,1,0,10.174873,3,3,5,0
3,1011,38,Male,78868,Weekly,92.705568,Mexican,Dinner,4,Business,Takeaway,0,0,1,14.237746,4,1,4,0
4,1016,50,Female,128686,Monthly,166.931144,American,Dinner,8,Business,Dine-in,0,0,0,56.319628,4,3,4,0


In [3]:
# configuración del Pipeline
TARGET = 'HighSatisfaction'
VARS_TO_DROP = ['CustomerID','PreferredCuisine','Gender','TimeOfVisit','AverageSpend','Age', TARGET]
#CONTINUE_VARS_TO_IMPUTATION = ['Age', 'Fare']
#CATEGORICAL_VARS_TO_IMPUTATION = ['Embarked', 'Sex']
OHE_VAR_TO_ENCODE = ['MealType']
FREQENC_VARS_TO_ENCODE = ['VisitFrequency','DiningOccasion']

In [4]:
x_features = dataset.drop(labels=VARS_TO_DROP, axis=1)
y_target = dataset[TARGET]
x_train, x_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.3, shuffle=True, random_state=2025)

In [5]:
from sklearn.pipeline import Pipeline
from feature_engine.encoding import OneHotEncoder
from feature_engine.encoding import CountFrequencyEncoder 

from sklearn.preprocessing import StandardScaler

In [6]:
restaurant_predict_model = Pipeline([
    
    #Codificación de variabales categóricas 
    ('categorical_encoding_ohe', OneHotEncoder(variables=OHE_VAR_TO_ENCODE, drop_last=True)),
    ('caregorical_encoding_freq_enc', CountFrequencyEncoder(encoding_method='count', variables=FREQENC_VARS_TO_ENCODE)),

    # estandarización de variables.
    ('feature_scaling', StandardScaler())
])

In [7]:
# configuramos pipeline
restaurant_predict_model.fit(x_train)

In [8]:
x_fatures_processed = restaurant_predict_model.transform(x_train)
df_fatures_process = pd.DataFrame(x_fatures_processed, columns=x_train.columns)
df_fatures_process[TARGET] = y_train.reset_index()['HighSatisfaction']

#guardamos los datos para entrenar los modelos.
df_fatures_process.to_csv('../data/processed/features_for_model.csv', index=False)
#df_fatures_process.to_csv('features_for_model.csv', index=False)
df_fatures_process.head()

Unnamed: 0,Income,VisitFrequency,GroupSize,DiningOccasion,MealType,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,WaitTime,ServiceRating,FoodRating,AmbianceRating,HighSatisfaction
0,-0.811886,0.998348,1.64311,1.227147,1.271725,-0.965987,-1.190903,-1.476591,1.274025,0.519774,-1.427837,1.188975,0
1,-1.389716,-0.640047,1.64311,1.227147,-0.786334,-0.965987,0.839699,0.089912,1.274025,-1.554973,-0.064317,-0.84106,0
2,0.202765,0.998348,1.64311,1.227147,1.271725,1.035211,-1.190903,1.934359,1.274025,0.519774,-0.746077,-0.84106,1
3,1.523823,0.998348,-1.485101,-0.871187,-0.786334,-0.965987,0.839699,0.242351,1.274025,0.519774,-0.746077,-0.84106,0
4,0.02131,0.998348,1.64311,-0.871187,-0.786334,1.035211,0.839699,0.79589,0.57671,1.211356,-0.064317,1.188975,0


In [9]:
import pickle

x_test[TARGET] = y_test
x_test.to_csv('../data/processed/test_dataset.csv', index=False)

with open('../artifacts/pipeline.pkl', 'wb') as f:
    pickle.dump(restaurant_predict_model, f)

In [10]:
y_test

615     1
1353    0
994     1
273     1
1154    0
       ..
1025    1
505     1
1153    0
839     1
888     0
Name: HighSatisfaction, Length: 546, dtype: int64

In [11]:
x_test

Unnamed: 0,Income,VisitFrequency,GroupSize,DiningOccasion,MealType,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,WaitTime,ServiceRating,FoodRating,AmbianceRating,HighSatisfaction
615,119159,Monthly,4,Casual,Dine-in,0,1,0,3.925785,3,3,3,1
1353,84953,Weekly,3,Celebration,Takeaway,0,1,0,49.826118,5,5,2,0
994,68238,Weekly,1,Business,Takeaway,1,0,1,23.252418,2,4,4,1
273,103678,Rarely,6,Celebration,Dine-in,1,0,0,20.138905,3,1,4,1
1154,123048,Rarely,5,Celebration,Takeaway,0,0,0,25.638408,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1025,145234,Monthly,5,Casual,Dine-in,1,1,1,12.221203,5,5,1,1
505,117887,Weekly,4,Casual,Takeaway,1,0,0,55.462106,3,1,1,1
1153,48397,Weekly,8,Celebration,Takeaway,0,0,1,5.115625,4,2,2,0
839,47731,Rarely,2,Celebration,Dine-in,1,1,1,6.428325,4,4,1,1
