In [124]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import recall_score,precision_score,roc_auc_score,accuracy_score
import pickle

In [36]:
data = pd.read_csv('restaurant_customer_satisfaction.csv')

In [37]:
data.tail()

Unnamed: 0,CustomerID,Age,Gender,Income,VisitFrequency,AverageSpend,PreferredCuisine,TimeOfVisit,GroupSize,DiningOccasion,MealType,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,WaitTime,ServiceRating,FoodRating,AmbianceRating,HighSatisfaction
1495,2149,39,Male,114857,Monthly,163.015254,American,Lunch,2,Business,Dine-in,0,1,1,7.206275,1,2,1,0
1496,2150,37,Female,133506,Weekly,190.991911,Italian,Lunch,4,Casual,Takeaway,0,0,0,37.863952,5,2,2,0
1497,2151,46,Male,119159,Monthly,150.088604,American,Lunch,4,Casual,Dine-in,0,1,0,3.925785,3,3,3,1
1498,2152,24,Male,27970,Weekly,196.363626,Italian,Dinner,6,Casual,Dine-in,1,1,0,24.228038,2,2,4,1
1499,2153,51,Male,148333,Weekly,171.119498,Chinese,Breakfast,4,Casual,Dine-in,1,1,1,39.402163,4,5,4,1


In [38]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 19 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   CustomerID            1500 non-null   int64  
 1   Age                   1500 non-null   int64  
 2   Gender                1500 non-null   object 
 3   Income                1500 non-null   int64  
 4   VisitFrequency        1500 non-null   object 
 5   AverageSpend          1500 non-null   float64
 6   PreferredCuisine      1500 non-null   object 
 7   TimeOfVisit           1500 non-null   object 
 8   GroupSize             1500 non-null   int64  
 9   DiningOccasion        1500 non-null   object 
 10  MealType              1500 non-null   object 
 11  OnlineReservation     1500 non-null   int64  
 12  DeliveryOrder         1500 non-null   int64  
 13  LoyaltyProgramMember  1500 non-null   int64  
 14  WaitTime              1500 non-null   float64
 15  ServiceRating        

In [39]:
data.describe()

Unnamed: 0,CustomerID,Age,Income,AverageSpend,GroupSize,OnlineReservation,DeliveryOrder,LoyaltyProgramMember,WaitTime,ServiceRating,FoodRating,AmbianceRating,HighSatisfaction
count,1500.0,1500.0,1500.0,1500.0,1500.0,1500.0,1500.0,1500.0,1500.0,1500.0,1500.0,1500.0,1500.0
mean,1403.5,43.832,85921.89,105.659004,5.035333,0.296667,0.405333,0.48,30.16355,3.044,2.997333,2.987333,0.134
std,433.157015,14.967157,38183.051749,52.381849,2.558864,0.456941,0.49112,0.499766,17.214184,1.423405,1.41892,1.450716,0.340766
min,654.0,18.0,20012.0,10.306127,1.0,0.0,0.0,0.0,0.00138,1.0,1.0,1.0,0.0
25%,1028.75,31.75,52444.0,62.287907,3.0,0.0,0.0,0.0,15.235423,2.0,2.0,2.0,0.0
50%,1403.5,44.0,85811.0,104.626408,5.0,0.0,0.0,0.0,30.044055,3.0,3.0,3.0,0.0
75%,1778.25,57.0,119159.25,148.64933,7.0,1.0,1.0,1.0,45.285649,4.0,4.0,4.0,0.0
max,2153.0,69.0,149875.0,199.973527,9.0,1.0,1.0,1.0,59.970762,5.0,5.0,5.0,1.0


In [40]:
data.drop(['CustomerID'],axis=1,inplace=True)

In [41]:
data.isna().sum()

Age                     0
Gender                  0
Income                  0
VisitFrequency          0
AverageSpend            0
PreferredCuisine        0
TimeOfVisit             0
GroupSize               0
DiningOccasion          0
MealType                0
OnlineReservation       0
DeliveryOrder           0
LoyaltyProgramMember    0
WaitTime                0
ServiceRating           0
FoodRating              0
AmbianceRating          0
HighSatisfaction        0
dtype: int64

In [106]:
X = data.drop('HighSatisfaction', axis=1)
y = data['HighSatisfaction']

In [91]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [138]:
num = X_train.select_dtypes(include=['number']).columns
cat = X_train.select_dtypes(include=['object']).columns

In [139]:
num_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])
cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

In [140]:
preprocess_steps = ColumnTransformer(transformers=[
    ('num', num_transformer, num),
    ('cat', cat_transformer, cat)
])

In [141]:
model = Pipeline(steps=[
    ('preprocess', preprocess_steps),
    ('classifier', LogisticRegression()),
])

In [142]:
model.fit(X_train,y_train)

In [123]:
with open('pipeline_1.pkl','wb') as file:
    pickle.dump(model,file)

In [None]:
X_test.csv('testing.csv')

In [117]:
cls = {
    'RandomForest':RandomForestClassifier(),
    'Logistic':LogisticRegression(),
    'SVM': SVC()
}

In [120]:
for name,model in cls.items():
    print('==============================================================')
    pipe = Pipeline(steps=[
    ('preprocess', preprocess_steps),
    ('models', model)
    ])
    pipe.fit(X_train,y_train)
    print('fitting for',name,'classifier')
    y_pred = pipe.predict(X_test)
    precision= precision_score(y_test,y_pred)
    RS= recall_score(y_test,y_pred)
    AUC = roc_auc_score(y_test,y_pred)
    accuracy = accuracy_score(y_test,y_pred)
    print(f'{name}_precision = {precision}')
    print(f'{name}_RS = {RS}')
    print(f'{name}_AUC = {AUC}')
    print(f'{name}_accuracy = {accuracy}')
    print('-------------------------------------------------------------------------------------')

fitting for RandomForest classifier
RandomForest_precision = 1.0
RandomForest_RS = 0.0975609756097561
RandomForest_AUC = 0.5487804878048781
RandomForest_accuracy = 0.8766666666666667
-------------------------------------------------------------------------------------
fitting for Logistic classifier
Logistic_precision = 0.75
Logistic_RS = 0.5121951219512195
Logistic_AUC = 0.7425840474620964
Logistic_accuracy = 0.91
-------------------------------------------------------------------------------------
fitting for SVM classifier
SVM_precision = 0.9333333333333333
SVM_RS = 0.34146341463414637
SVM_AUC = 0.6688012053865713
SVM_accuracy = 0.9066666666666666
-------------------------------------------------------------------------------------
