In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load the dataset
jj = pd.read_csv('jj_translated.csv')

# Display the first few rows of the dataframe to understand its structure
print(jj.head())

# Define the relevant features and target columns
features = ['Date', 'Day of Week']  # Adjust based on your dataset

         Date Day of Week Payment Type Customer ID Approval  \
0  2023-01-01      Sunday         BANK      1K0E00      PAY   
1  2023-01-01      Sunday         BANK      1K0E00      PAY   
2  2023-01-02      Monday       CREDIT      0D1GG0      PAY   
3  2023-01-02      Monday       CREDIT      0D1GG0      PAY   
4  2023-01-02      Monday       CREDIT      0D1GG0      PAY   

                      Order  Price      Cost    Commision  Total VAT  \
0         Strawberry Joyful   6500  2611.610  1964.000000      221.0   
1              Kiwi Delight   6500  2264.615  1964.000000      221.0   
2  Seasonal Cup Fruit (9oz)   7000  1000.000  1612.333333      169.5   
3  Seasonal Cup Fruit (9oz)   7000  1000.000  1612.333333      169.5   
4  Seasonal Cup Fruit (9oz)   7000  1000.000  1612.333333      169.5   

     Total Fee      Revenue  
0  2185.000000  -481.610000  
1  2185.000000  -134.615000  
2  1781.833333  2436.333333  
3  1781.833333  2436.333333  
4  1781.833333  2436.333333  


In [15]:
# Convert the 'Date' column to datetime format
jj['Date'] = pd.to_datetime(jj['Date'], errors='coerce')

# Drop rows where 'Date' could not be converted (if any)
jj = jj.dropna(subset=['Date'])

# Extract the day of the week from the 'Date' column if 'Day of Week' is not already present
if 'Day of Week' not in jj.columns:
    jj['Day of Week'] = jj['Date'].dt.day_name()

# Define the features as 'Date' and 'Day of Week'
features = ['Date']  # We'll add 'Day of Week' dummy variables later

# Convert 'Date' to numerical values (e.g., timestamp) and 'Day of Week' to dummy/indicator variables
jj['Date'] = jj['Date'].map(pd.Timestamp.toordinal)
#jj = pd.get_dummies(jj, columns=['Day of Week'], drop_first=True, errors='ignore')

# Identify unique menu items
menu_items = jj['Order'].unique()

# Dictionary to store models for each menu item
models = {}

# Loop through each menu item and train a model
for menu_item in menu_items:
    print(f'Training model for: {menu_item}')
    
    # Filter data for the specific menu item
    jj_item = jj[jj['Order'] == menu_item].copy()
    
    # Ensure there are sufficient data points
    if jj_item.shape[0] < 10:
        print(f'Not enough data to train model for {menu_item}. Skipping...')
        continue
    
    # Separate the features (X) and the target variable (y)
    X = jj_item[features + list(jj.columns[jj.columns.str.startswith('Day of Week_')])]
    y = jj_item['Price']  # The price for this specific menu item
    
    # Split the data into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = model.predict(X_test)
    
    # Print actual vs. predicted prices
    print(f"\nActual vs Predicted Prices for {menu_item}:")
    for actual, predicted in zip(y_test, y_pred):
        print(f"Actual: {actual}, Predicted: {predicted}")
    
    # Save the model in the dictionary
    models[menu_item] = model

Training model for: Strawberry Joyful
Not enough data to train model for Strawberry Joyful. Skipping...
Training model for: Kiwi Delight
Not enough data to train model for Kiwi Delight. Skipping...
Training model for: Seasonal Cup Fruit (9oz)
Not enough data to train model for Seasonal Cup Fruit (9oz). Skipping...
Training model for: Apple Kale Juice

Actual vs Predicted Prices for Apple Kale Juice:
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Training model for: Green Grape Kale Juice
Not enough data to train model for Green Grape Kale Juice. Skipping...
Training model for: Tomato Juice
Not enough data to train model for Tomato Juice. Skipping...
Training model for: Lemon Juice
Not enough data to train model for Lemon Juice. Skipping...
Training model for: Grapefruit Lemon Juice
Not enough data to train model for Grapefruit Lemon Juice. Skipping...
Training model for: Str

In [17]:
menu_items = jj['Order'].unique()
print(menu_items)

['Strawberry Joyful' 'Kiwi Delight' 'Seasonal Cup Fruit (9oz)'
 'Apple Kale Juice' 'Green Grape Kale Juice' 'Tomato Juice' 'Lemon Juice'
 'Grapefruit Lemon Juice' 'Strawberry Greek Yogurt'
 'Strawberry Lemon Breeze' 'Apple Lemon Kale Celery Juice' 'Orange Juice'
 'Mango Coco' 'Pineapple Lassi Yogurt Smoothie' 'Pear Juice' 'Peach Juice'
 'Mango Juice' 'Melon Juice' 'Apple Carrot Juice' 'Pomegranate Juice'
 'Winter Signature Vin Chaud' 'ABC Juice' 'Orange Pineapple Juice'
 'Ssangwhacha' 'Apple Cabbage Juice' 'Nutella Banana Greek Yogurt'
 'Strawberry Milk' 'My Special Greek Yogurt' 'Seasonal Fruit (17oz)'
 'Avocado Banana Bliss' 'Apple Pineapple Kale Juice' 'Pineapple Juice'
 'Pineapple Sunshine' 'Croque-Monsieur' 'Apple Beet Juice'
 'My Special Juice' 'Strawberry Lassi Yogurt Smoothie' 'Passion Island'
 'Berry Banana Greek Yogurt' 'Green Grape Juice' 'Watermelon Juice'
 'Dark Chocolate Banana Dream' 'Strawberry Juice' 'Americano'
 'Peanut Butter Jam Toast' 'Berry Blast' 'Apple Juice' 'S

In [21]:
for menu_item in menu_items:
    jj_item = jj[jj['Order'] == menu_item].copy()
    print(jj_item.shape[0])

7
1
5
22
2
7
7
1
1
8
18
13
9
3
15
4
3
6
12
2
1
21
2
4
1
1
6
6
2
5
1
8
6
2
1
5
3
1
3
2
8
4
9
5
5
4
3
1
1
3
1
1
2
1
2
1
1
1
2
1
1
3
2
2
1
1
1
2
1


In [23]:
# Dictionary to store models for each menu item
models = {}

# Loop through each menu item and train a model
for menu_item in menu_items:
    print(f'Training model for: {menu_item}')

    # Filter data for the specific menu item
    jj_item = jj[jj['Order'] == menu_item].copy()
    
    # Ensure there are sufficient data points
    if jj_item.shape[0] < 5:
        print(f'Not enough data to train model for {menu_item}. Skipping...')
        continue
    
    # Separate the features (X) and the target variable (y)
    X = jj_item[features]
    y = jj_item['Price']  # The price for this specific menu item
    
    # Split the data into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = model.predict(X_test)
    
    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f'Mean Squared Error for {menu_item}: {mse}')
    print(f'R^2 Score for {menu_item}: {r2}')
    
    # Save the model in the dictionary
    models[menu_item] = model
    
    # Optionally, save each model to a file
    #model_path = f'/mnt/data/{menu_item.replace(" ", "_")}_price_model.pkl'
    #joblib.dump(model, model_path)
    #print(f'Trained model for {menu_item} saved to {model_path}')

# Example: Access the model for 'Strawberry Joyful' later
# strawberry_model = models.get('Strawberry Joyful')

# You now have models for each menu item saved and ready for use in your AI system.

Training model for: Strawberry Joyful
Mean Squared Error for Strawberry Joyful: 0.0
R^2 Score for Strawberry Joyful: 1.0
Training model for: Kiwi Delight
Not enough data to train model for Kiwi Delight. Skipping...
Training model for: Seasonal Cup Fruit (9oz)
Mean Squared Error for Seasonal Cup Fruit (9oz): 0.0
R^2 Score for Seasonal Cup Fruit (9oz): nan
Training model for: Apple Kale Juice
Mean Squared Error for Apple Kale Juice: 0.0
R^2 Score for Apple Kale Juice: 1.0
Training model for: Green Grape Kale Juice
Not enough data to train model for Green Grape Kale Juice. Skipping...
Training model for: Tomato Juice
Mean Squared Error for Tomato Juice: 0.0
R^2 Score for Tomato Juice: 1.0
Training model for: Lemon Juice
Mean Squared Error for Lemon Juice: 0.0
R^2 Score for Lemon Juice: 1.0
Training model for: Grapefruit Lemon Juice
Not enough data to train model for Grapefruit Lemon Juice. Skipping...
Training model for: Strawberry Greek Yogurt
Not enough data to train model for Strawberr



In [25]:
for menu_item in menu_items:
    jj_item = jj[jj['Order'] == menu_item].copy()
    
    if jj_item.shape[0] < 10:
        print(f'Not enough data to train model for {menu_item}. Skipping...')
    else:
    # Separate the features (X) and the target variable (y)
        X = jj_item[features]
        y = jj_item['Price']  # The price for this specific menu item
    
    # Split the data into training and testing sets (80% train, 20% test)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the linear regression model
        model = LinearRegression()
        model.fit(X_train, y_train)
    
    # Make predictions on the test set
        y_pred = model.predict(X_test)
    
    # Print actual vs. predicted prices
        print("Actual vs Predicted Prices for Each Menu:")
        for actual, predicted in zip(y_test, y_pred):
            print(f"Actual: {actual}, Predicted: {predicted}")

Not enough data to train model for Strawberry Joyful. Skipping...
Not enough data to train model for Kiwi Delight. Skipping...
Not enough data to train model for Seasonal Cup Fruit (9oz). Skipping...
Actual vs Predicted Prices for Each Menu:
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Not enough data to train model for Green Grape Kale Juice. Skipping...
Not enough data to train model for Tomato Juice. Skipping...
Not enough data to train model for Lemon Juice. Skipping...
Not enough data to train model for Grapefruit Lemon Juice. Skipping...
Not enough data to train model for Strawberry Greek Yogurt. Skipping...
Not enough data to train model for Strawberry Lemon Breeze. Skipping...
Actual vs Predicted Prices for Each Menu:
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Actual: 7000, Predicted: 7000.0
Actual vs Predicted P

In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Load the dataset again
jj = pd.read_csv('jj_translated.csv')

# Select relevant features for the model
features = jj[['Day of Week', 'Payment Type', 'Price', 'Cost', 'Commision', 'Total VAT', 'Total Fee']]
target = jj['Revenue']

# Convert categorical variables to numerical format using one-hot encoding
categorical_features = features[['Day of Week', 'Payment Type']]
one_hot_encoder = OneHotEncoder(sparse=False, drop='first')  # Corrected argument 'sparse'
encoded_features = one_hot_encoder.fit_transform(categorical_features)

# Combine encoded features with the rest of the numerical features
numeric_features = features.drop(['Day of Week', 'Payment Type'], axis=1).values
X = np.hstack((numeric_features, encoded_features))

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2, random_state=42)

# Display the shapes of the resulting datasets
(X_train.shape, X_test.shape, y_train.shape, y_test.shape)



((237, 14), (60, 14), (237,), (60,))

In [53]:

# Step 2: Initialize and train the Linear Regression model
linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train, y_train)

# Step 3: Predict revenue for different prices
# Generate a range of possible prices
price_range = np.arange(4000, 13000, 100)  # Prices from 4000 to 13000 in steps of 100

# Prepare the base input template: Keep other features at their average or typical values
average_cost = features['Cost'].mean()
average_commission = features['Commision'].mean()
average_vat = features['Total VAT'].mean()
average_fee = features['Total Fee'].mean()

# Encoding for categorical features (e.g., 'Monday' and 'BANK')
encoded_features = one_hot_encoder.transform([['Monday', 'BANK']])  # Example of fixed values

predicted_revenues = []
predicted_profits = []

for price in price_range:
    # Combine fixed average values with the varying price
    input_features = np.hstack(([[price, average_cost, average_commission, average_vat, average_fee]], encoded_features))
    
    # Predict revenue
    predicted_revenue = linear_regression_model.predict(input_features)
    
    # Calculate corresponding profit
    predicted_profit = predicted_revenue[0] - average_cost  # Simplified profit calculation
    
    predicted_revenues.append(predicted_revenue[0])
    predicted_profits.append(predicted_profit)

# Step 5: Find the optimal price
optimal_price_index = np.argmax(predicted_profits)  # Find index of maximum profit
optimal_price = price_range[optimal_price_index]

print(f"Optimal Price: {optimal_price}")

Optimal Price: 12900


