In [4]:
import pandas as pd
import ast
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load data
data = pd.read_csv('restaurant_menu_optimization_data.csv')

# Preprocessing Ingredients: convert string representation of list to actual list
data['Ingredients'] = data['Ingredients'].apply(ast.literal_eval)

# One-Hot Encoding for MenuCategory
one_hot_encoder = OneHotEncoder()
menu_category_encoded = one_hot_encoder.fit_transform(data[['MenuCategory']]).toarray()

# Convert to DataFrame and add to original data
menu_category_df = pd.DataFrame(menu_category_encoded, columns=one_hot_encoder.get_feature_names_out(['MenuCategory']))
data_encoded = pd.concat([data, menu_category_df], axis=1)

# Label Encoding for Profitability
label_encoder = LabelEncoder()
data_encoded['Profitability'] = label_encoder.fit_transform(data_encoded['Profitability'])

# Standardize the Price column
scaler = StandardScaler()
data_encoded['Price'] = scaler.fit_transform(data_encoded[['Price']])

# Define the bins and labels for categorization
bins = [-float('inf'), -1, 0, 1]  # Adjust bins according to the actual price ranges
labels = ['Low', 'Medium', 'High']

# Categorize Price
data_encoded['PriceCategory'] = pd.cut(data_encoded['Price'], bins=len(labels), labels=labels)

# Drop original categorical columns that were encoded
data_encoded = data_encoded.drop(columns=['MenuCategory', 'MenuItem', 'Ingredients', 'RestaurantID'])

# Display the first few rows of the encoded data
print(data_encoded.head())

# Pisahkan fitur dan target
X = data_encoded.drop(columns=['Price', 'PriceCategory'])
y = data_encoded['PriceCategory']

# Pisahkan data menjadi data pelatihan dan data pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Decision tree
# Inisialisasi model Decision Tree
dt_classifier = DecisionTreeClassifier(random_state=42)

# Melatih model
dt_classifier.fit(X_train, y_train)

# Melakukan cross-validation
dt_scores = cross_val_score(dt_classifier, X, y, cv=5, scoring='accuracy')
dt_accuracy = dt_scores.mean()
dt_std = dt_scores.std()

# Evaluasi pada data test
dt_y_pred = dt_classifier.predict(X_test)
dt_test_accuracy = accuracy_score(y_test, dt_y_pred)
dt_report = classification_report(y_test, dt_y_pred, output_dict=True)

      Price  Profitability  MenuCategory_Appetizers  MenuCategory_Beverages  \
0 -1.385057              1                      0.0                     1.0   
1 -0.229045              2                      1.0                     0.0   
2  0.788029              0                      0.0                     0.0   
3  2.256986              0                      0.0                     0.0   
4  0.662581              2                      0.0                     0.0   

   MenuCategory_Desserts  MenuCategory_Main Course PriceCategory  
0                    0.0                       0.0           Low  
1                    0.0                       0.0           Low  
2                    1.0                       0.0        Medium  
3                    0.0                       1.0          High  
4                    0.0                       1.0        Medium  


In [5]:
import pickle

filename = 'resto_model.sav'
pickle.dump(dt_classifier, open(filename,'wb'))