In [4]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import joblib

In [5]:
# Load dataset
df = pd.read_csv('Mc.Donalds_menu.csv')

# Extract grams from Serving Size
def extract_grams_extended(serving):
    serving = str(serving)
    match_g = re.search(r'(\d+)\s*g', serving)
    if match_g:
        return int(match_g.group(1))
    match_oz = re.search(r'(\d+)\s*fl\s*oz', serving)
    if match_oz:
        return int(int(match_oz.group(1)) * 29.57)
    match_ml = re.search(r'(\d+)\s*ml', serving)
    if match_ml:
        return int(match_ml.group(1))
    return None

df['Serving Size (g)'] = df['Serving Size'].apply(extract_grams_extended)

# Define features
numeric_features = [
    'Calories from Fat', 'Total Fat', 'Total Fat (% Daily Value)',
    'Saturated Fat', 'Saturated Fat (% Daily Value)', 'Protein',
    'Carbohydrates', 'Carbohydrates (% Daily Value)',
    'Sodium', 'Sodium (% Daily Value)'
]
target_col = 'Calories'

# Drop rows with missing data
df_clean = df[numeric_features + ['Category', target_col]].dropna()

# One-hot encode 'Category'
category_dummies = pd.get_dummies(df_clean['Category'], drop_first=True).astype(int)
category_columns = category_dummies.columns.tolist()

# Combine features
X = pd.concat([df_clean[numeric_features], category_dummies], axis=1).values
y = df_clean[target_col].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Predict
y_pred = model.predict(X_test_scaled)

# Evaluation
print("\nModel with Category Feature")
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}")

# Save model, scaler, and category column names
joblib.dump(model, 'model_with_category.pkl')
joblib.dump(scaler, 'scaler_with_category.pkl')
joblib.dump(category_columns, 'category_columns.pkl')

print("\nModel, scaler, and category columns saved successfully!")



Model with Category Feature
X_train shape: (182, 18)
X_test shape: (78, 18)
R² Score: 0.9996
MAE: 3.81
MSE: 26.35

Model, scaler, and category columns saved successfully!


  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_


In [None]:
import joblib

# Load saved model, scaler, and category columns
model = joblib.load('model_with_category.pkl')
scaler = joblib.load('scaler_with_category.pkl')
category_columns = joblib.load('category_columns.pkl')

# new data for prediction 
new_data_dict = {
    'Calories from Fat': [300],
    'Total Fat': [33],
    'Total Fat (% Daily Value)': [51],
    'Saturated Fat': [12],
    'Saturated Fat (% Daily Value)': [60],
    'Protein': [25],
    'Carbohydrates': [40],
    'Carbohydrates (% Daily Value)': [13],
    'Sodium': [800],
    'Sodium (% Daily Value)': [35],
    'Category': ['Chicken & Fish']  # New item category
}

# Convert to DataFrame
new_df = pd.DataFrame(new_data_dict)

# One-hot encode the Category column
category_df = pd.get_dummies(new_df['Category'])
for col in category_columns:
    if col not in category_df:
        category_df[col] = 0  # Add missing columns with 0
category_df = category_df[category_columns]  # Ensure column order

# Combine numeric features and encoded category features
numeric_features = [
    'Calories from Fat', 'Total Fat', 'Total Fat (% Daily Value)',
    'Saturated Fat', 'Saturated Fat (% Daily Value)', 'Protein',
    'Carbohydrates', 'Carbohydrates (% Daily Value)',
    'Sodium', 'Sodium (% Daily Value)'
]
X_new = pd.concat([new_df[numeric_features], category_df], axis=1)

# Scale using saved scaler
X_new_scaled = scaler.transform(X_new)

# Predict calories
predicted_calories = model.predict(X_new_scaled)
print(f"Predicted Calories: {predicted_calories[0]:.2f}")

Predicted Calories: 555.37


