# Step 1: Set Up Google Colab

In [None]:
!pip install pandas scikit-learn matplotlib seaborn

# Step 2: Upload & Load Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import pandas as pd

# Define file path in Google Drive
file_path = "/content/drive/My Drive/pulao_ingredients_500.csv"

# Load dataset
df = pd.read_csv(file_path)

# Display first few rows
print(df.head())


# Step 3: Data Exploration & Preprocessing

In [None]:
# Show dataset information
print(df.info())

# Check for missing values
print(df.isnull().sum())

# Check basic statistics
print(df.describe())



# Step 4: Data Visualization

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8, 5))
sns.scatterplot(x=df["Stock_Available (kg/L)"], y=df["Purchase_Quantity (kg/L)"])
plt.xlabel("Stock Available (kg/L)")
plt.ylabel("Purchase Quantity (kg/L)")
plt.title("Stock vs Purchase Quantity")
plt.show()


# Step 5: Prepare Data for ML Model

In [None]:
from sklearn.preprocessing import LabelEncoder

# Convert categorical column 'Ingredient' to numerical
encoder = LabelEncoder()
df["Ingredient"] = encoder.fit_transform(df["Ingredient"])

# Display first few rows
print(df.head())



# Step 6: Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split

# Define features and target variable
X = df[['Stock_Available (kg/L)', 'Reorder_Level (kg/L)', 'Ingredient']]
y = df['Purchase_Quantity (kg/L)']

# Split dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display dataset sizes
print(f"Training samples: {len(X_train)}, Testing samples: {len(X_test)}")


# Step 7: Train ML Model

In [None]:
from sklearn.ensemble import RandomForestRegressor

# Initialize the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on training data
rf_model.fit(X_train, y_train)

# Predict on test data
y_pred_rf = rf_model.predict(X_test)

# Display first few predictions
print("Predicted Purchase Quantities (Random Forest):", y_pred_rf[:5])


# Step 8: Evaluate Model Performance

In [None]:
# Calculate error metrics
mae_rf = mean_absolute_error(y_test, y_pred_rf)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f"Random Forest - Mean Absolute Error: {mae_rf}")
print(f"Random Forest - Mean Squared Error: {mse_rf}")
print(f"Random Forest - R² Score: {r2_rf}")


# Step 9: Save the Model for Web Integration

In [None]:
import joblib

# Save trained Random Forest model
rf_model_filename = "/content/pulao_inventory_rf_model.pkl"
joblib.dump(rf_model, rf_model_filename)

print("Random Forest Model saved successfully!")
