In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [3]:
df=pd.read_csv('/content/extended_sustainable_inventory_dataset.csv')
df.head()

Unnamed: 0,item_name,category,quantity_purchased,quantity_consumed,waste_generated,purchase_date,expiry_date,days_until_expiry
0,milk,dairy,19.93,19.37,0.56,2025-03-29,2025-04-06,8
1,cheese,dairy,7.07,5.83,1.24,2025-04-07,2025-04-29,22
2,carrot,vegetable,12.34,9.14,3.2,2025-04-02,2025-04-07,5
3,mutton,meat,10.47,8.47,2.0,2025-04-02,2025-04-23,21
4,spinach,vegetable,6.9,6.9,0.0,2025-03-25,2025-04-01,7


In [4]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   item_name           1500 non-null   object 
 1   category            1500 non-null   object 
 2   quantity_purchased  1500 non-null   float64
 3   quantity_consumed   1500 non-null   float64
 4   waste_generated     1500 non-null   float64
 5   purchase_date       1500 non-null   object 
 6   expiry_date         1500 non-null   object 
 7   days_until_expiry   1500 non-null   int64  
dtypes: float64(3), int64(1), object(4)
memory usage: 93.9+ KB


In [5]:
# 1. Drop or encode irrelevant columns
df = df.drop(columns=['item_name', 'purchase_date', 'expiry_date'])

# 2. Encode 'category'
df = pd.get_dummies(df, columns=['category'], drop_first=True)

# 3. Define X and y
X = df.drop(columns=['waste_generated'])  # Features
y = df['waste_generated']                 # Target


In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
mlr = LinearRegression()
mlr.fit(X_train_scaled, y_train)

y_pred_mlr = mlr.predict(X_test_scaled)
print("MLR MSE:", mean_squared_error(y_test, y_pred_mlr))

MLR MSE: 6.79455074321807e-30


In [10]:
nn = Sequential([
    Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)  # Regression output
])

nn.compile(optimizer='adam', loss='mse')
nn.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - loss: 11.8595 - val_loss: 6.2237
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 6.7694 - val_loss: 3.8725
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 3.9868 - val_loss: 3.3240
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 3.1365 - val_loss: 2.5747
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 2.3269 - val_loss: 1.8113
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.6995 - val_loss: 1.0284
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.8055 - val_loss: 0.4451
Epoch 8/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.3182 - val_loss: 0.1410
Epoch 9/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x7e36acb38a10>

In [11]:
import joblib
joblib.dump(mlr, 'mlr_model.pkl')
joblib.dump(scaler, 'mlr_scaler.pkl')


['mlr_scaler.pkl']

In [12]:
nn.save('nn_model.h5')  # or use `SavedModel` format with `nn.save('nn_model/')`




In [17]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# y_test: actual values
# y_pred: predicted values
# Make predictions
y_pred_mlr = mlr.predict(X_test_scaled)

# Evaluate
mae = mean_absolute_error(y_test, y_pred_mlr)
mse = mean_squared_error(y_test, y_pred_mlr)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred_mlr)

print("MLR MAE :", mae)
print("MLR MSE :", mse)
print("MLR RMSE:", rmse)
print("MLR R²  :", r2)


MLR MAE : 2.1293036778224917e-15
MLR MSE : 6.79455074321807e-30
MLR RMSE: 2.6066359053803564e-15
MLR R²  : 1.0


In [18]:
# Make predictions
y_pred_nn = nn.predict(X_test_scaled).flatten()  # flatten to convert from shape (n, 1) to (n,)

# Evaluate
mae = mean_absolute_error(y_test, y_pred_nn)
mse = mean_squared_error(y_test, y_pred_nn)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred_nn)

print("NN MAE :", mae)
print("NN MSE :", mse)
print("NN RMSE:", rmse)
print("NN R²  :", r2)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
NN MAE : 0.038612376223007824
NN MSE : 0.0024720359841702943
NN RMSE: 0.04971957345121028
NN R²  : 0.9994891176909745
