# This notebook is used to export csv files with different views on the data

In [20]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
import sqlite3
from pathlib import Path


In [21]:
dbPath = Path("../data/data.sqlite")
if not dbPath.exists():
    # Avoid creating an empty database
    raise Exception("Database file does not exist")
con = sqlite3.connect(dbPath)

In [22]:
customerTypeMap = ["Student", "Teenager", "Adult", "Senior"]
customerLocationMap = ["Munich District One","Munich District Three","Munich District Two","Munich District Four","Munich District Five"]
distributionChannelMap =  ["Feedera SE", "Deliver Now Holding", "Deliveruu Inc.", "Orderly SE", "BestOrder Inc.", "TownExpress Inc.", "Heropizza Lmtd."]
weekdayMap = ["Sunday", "Tuesday", "Friday", "Saturday", "Wednesday", "Monday", "Thursday"]
costfactorMap = ["","Chef 2","Chef 1","Ingredients","Delivery Scooters","Phone Bill","Delivery Guy 2","Distribution channel fees","Waiter","Delivery Guy 1"]
sizeMap = ["Medium", "Small", "Large"]
typeMap = ["Funghi", "Salami", "Calzone", "Speciale", "Magherita", "Paprika", "Veggie"]

In [23]:
df = pd.read_sql("SELECT * FROM Pizza_Case", con)
df.drop(["_CASE_KEY"], axis=1, inplace=True)
df.drop("Customer_ID", axis=1, inplace=True)

df["CustomerType"] = df["CustomerType"].map(customerTypeMap.index)
df["CustomerLocation"] = df["CustomerLocation"].map(customerLocationMap.index)
df["DistributionChannel"] = df["DistributionChannel"].map(distributionChannelMap.index)
df["Weekday"] = df["Weekday"].map(weekdayMap.index)
df["CostFactor"] = df["CostFactor"].map(costfactorMap.index)
df["PizzaSize"] = df["PizzaSize"].map(sizeMap.index)
df["PizzaType"] = df["PizzaType"].map(typeMap.index)

df

Unnamed: 0,Variant,Revenue,Costs,PizzaType,PizzaSize,CustomerSatisfaction,CostFactor,Weekday,Daytime,DistributionChannel,CustomerLocation,CustomerType
0,1,17,13,0,0,3,1,0,14,0,0,0
1,1,19,10,1,1,3,1,0,13,1,0,0
2,1,9,16,2,2,0,2,0,12,0,0,1
3,1,36,56,2,2,1,2,0,18,2,0,2
4,1,5,35,1,1,5,3,0,14,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1993,9,11,2,3,1,5,2,3,18,4,4,0
1994,9,54,23,0,0,3,4,2,11,0,0,0
1995,9,29,2,4,0,4,9,2,18,6,0,1
1996,9,12,12,0,2,3,2,1,21,2,1,0


In [24]:
# Extract the features and target variable
X = df.drop(['CustomerSatisfaction'], axis=1)
y = df['CustomerSatisfaction']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a decision tree classifier on the training data
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Evaluate the performance of the classifier on the testing data
score = clf.score(X_test, y_test)
print('Accuracy:', score)

# Print the feature importances
importances = clf.feature_importances_
for feature, importance in zip(X.columns, importances):
    print(feature, importance)

Accuracy: 0.24
Variant 0.11276751387778108
Revenue 0.14600285907413374
Costs 0.1359836513938124
PizzaType 0.07795224656896195
PizzaSize 0.04587315287726308
CostFactor 0.1022087646608644
Weekday 0.08068408251555198
Daytime 0.10017396160951009
DistributionChannel 0.0723498272240702
CustomerLocation 0.06771655165153569
CustomerType 0.05828738854651542
