In [13]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer


In [40]:
from data import restaurant_data

classes = ['Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes']
col_labels= ['Alternate', 'Bar', 'Friday', 'Hungry', 'Patron', 'Price', 'Raining', 'Reservation', 'Type', 'Estimated_Time']

dataFrame = pd.DataFrame(restaurant_data).transpose()
df=dataFrame.copy()
df.columns=col_labels
df

Unnamed: 0,Alternate,Bar,Friday,Hungry,Patron,Price,Raining,Reservation,Type,Estimated_Time
0,Yes,No,No,Yes,Some,$$$,No,Yes,French,0-10
1,Yes,No,No,Yes,Full,$,No,No,Thai,30-60
2,No,Yes,No,No,Some,$,No,No,Burger,0-10
3,Yes,No,Yes,Yes,Full,$,Yes,No,Thai,10-30
4,Yes,No,Yes,No,Full,$$$,No,Yes,French,>60
5,No,Yes,No,Yes,Some,$$,Yes,Yes,Italian,0-10
6,No,Yes,No,No,,$,Yes,No,Burger,0-10
7,No,No,No,Yes,Some,$$,Yes,Yes,Thai,0-10
8,No,Yes,Yes,No,Full,$,Yes,No,Burger,>60
9,Yes,Yes,Yes,Yes,Full,$$$,No,Yes,Italian,10-30


In [44]:
replacement_mapping = {
    "010": "short",
    "1030": "medium",
    "3060": "long",
    ">60": "extra_long"
    }

def remove_hyphens(strings_list):
    return [s.replace('-', '') for s in strings_list]

class FeatureEncoder(BaseEstimator, TransformerMixin):
       
    def __init__(self, dict_values):
        self.dict_values= dict_values
        
    def fit(self, X, y=None):
        return self

    def transform(self, X): 
        # X - dataFrame
        dictionary_values = self.dict_values
        for key in dictionary_values:
            if key == "Estimated_Time":
                estimated_time_column= X[key]
               
                processed_list = remove_hyphens(strings_list=list(estimated_time_column.values))                
                X[key] =  pd.Series(processed_list).replace(replacement_mapping)
                continue
            X[key] = X[key].replace(
                dictionary_values[key]
            )   
        return X   
    
def generate_yes_no_dict():
    return {
        "Yes":1,
        "No":0
    }

dict_values = {
    "Alternate": generate_yes_no_dict(),
    "Bar": generate_yes_no_dict(),
    "Friday": generate_yes_no_dict(),
    "Hungry": generate_yes_no_dict(),
    "Price": {
        "$":100,
        "$$":200,
        "$$$":300 },
    
    "Raining":generate_yes_no_dict(),
    "Reservation": generate_yes_no_dict(),
    "Type": generate_yes_no_dict(),
    "Estimated_Time": replacement_mapping
}

encoder = FeatureEncoder(dict_values=dict_values)
transformed_data_frame = encoder.fit_transform(df)
transformed_data_frame.sample(10)



Unnamed: 0,Alternate,Bar,Friday,Hungry,Patron,Price,Raining,Reservation,Type,Estimated_Time
7,0,0,0,1,Some,200,1,1,Thai,short
9,1,1,1,1,Full,300,0,1,Italian,medium
3,1,0,1,1,Full,100,1,0,Thai,medium
8,0,1,1,0,Full,100,1,0,Burger,extra_long
1,1,0,0,1,Full,100,0,0,Thai,long
4,1,0,1,0,Full,300,0,1,French,extra_long
2,0,1,0,0,Some,100,0,0,Burger,short
0,1,0,0,1,Some,300,0,1,French,short
6,0,1,0,0,,100,1,0,Burger,short
5,0,1,0,1,Some,200,1,1,Italian,short


In [21]:
import pandas as pd

data = ["0-10", "30-60", "0-10", "10-30", ">60", "0-10", "0-10", "0-10", ">60", "10-30", "0-10", "30-60"]

series = pd.Series(data)
replacement_mapping = {
    "0-10": "short",
    "10-30": "medium",
    "30-60": "long",
    ">60": "extra_long"
}
series = series.replace(replacement_mapping)
print(series)

0          short
1           long
2          short
3         medium
4     extra_long
5          short
6          short
7          short
8     extra_long
9         medium
10         short
11          long
dtype: object


In [11]:
# Sample data with a categorical column
data = {'Category': ['A', 'B', 'C', 'A', 'C', 'B']}
df = pd.DataFrame(data)

# Perform one-hot encoding using Pandas' get_dummies function
one_hot_encoded = pd.get_dummies(df, columns=['Category'])

# Display the one-hot encoded DataFrame
print(one_hot_encoded)

   Category_A  Category_B  Category_C
0        True       False       False
1       False        True       False
2       False       False        True
3        True       False       False
4       False       False        True
5       False        True       False


In [9]:
def predict_churn(customer_details, model):
    new_instance_df = pd.DataFrame([customer_details])
    print(new_instance_df.shape)
    print(new_instance_df)
    return model.predict(new_instance_df)[0]


dataFrame = df.drop("CustomerID", axis=1)
columns_of_interest = ['NumberOfDeviceRegistered', 'PreferedOrderCat', 'Tenure', 'Gender', 'OrderCount', 'Churn']

data_frame_of_interest = df[columns_of_interest].dropna(subset=['OrderCount', 'Tenure'])

#Pipelines are used for transforming some specific single COLUMN or the whole dataframe , else specified by the column transformer for some specific columns to be transformed.The df.dropna() removes rows  that have NAN values in the specified columns.

numerical_transformer = Pipeline([
    ('scaler', StandardScaler())
])

super_encoder = Pipeline([
    ('encoder', FeatureEncoder())
])

categorical_transformer = Pipeline([
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessing = ColumnTransformer(
    transformers=[        
        ('customFeatureEncoder', super_encoder),
        ('num', numerical_transformer, ['NumberOfDeviceRegistered', 'Tenure', 'OrderCount']),
        ('cat', categorical_transformer, ['PreferedOrderCat', 'Gender'])
    ]
)

lr_pipe = Pipeline([
    ('preprocessing', preprocessing),
    ('lr_estimator', LogisticRegression())
])

X = data_frame_of_interest.drop(['Churn'], axis=1)
Y = data_frame_of_interest['Churn']
X_train, X_test, Y_train, Y_test = train_test_split(
    X, 
    Y, 
    test_size=0.2,
    random_state=42,
    random_sate = 42,
    shuffle=True
)
lr_pipe.fit(X_train, Y_train)

y_pred = lr_pipe.predict(X_test)
accuracy = accuracy_score(Y_test, y_pred)
print('Accuracy:', accuracy)

instance = {
    "NumberOfDeviceRegistered": 2,
    "PreferedOrderCat": "Mobiles",
    "Tenure": 10,
    "Gender": "Female",
    "OrderCount": 3
}

prediction_for_one_customer = predict_churn(instance, lr_pipe)
lr_pipe.named_steps["logistic_regression"].predict(instance)
print("sth very cool here :: ", lr_pipe.named_steps["logistic_regression"].predict(instance))
print("Prediction for one Customer: {}".format(prediction_for_one_customer))
    

KeyError: "['CustomerID'] not found in axis"