In [74]:
import pandas as pd


In [75]:
df = pd.read_csv("food_safety_multitask_dataset.csv")

df.head()

Unnamed: 0,food_type,storage,time_since_prep,is_sealed,environment,safeForHours,confidence,riskLevel
0,raw,fridge,30.9,True,dry,7.7,0.7,medium
1,packaged,room_temp,27.6,False,humid,19.3,0.72,high
2,non_veg,fridge,23.1,False,dry,3.0,0.77,medium
3,cooked_veg,room_temp,34.0,True,humid,16.4,0.89,medium
4,non_veg,fridge,7.7,False,dry,21.4,0.88,low


In [76]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   food_type        360 non-null    object 
 1   storage          360 non-null    object 
 2   time_since_prep  360 non-null    float64
 3   is_sealed        360 non-null    bool   
 4   environment      360 non-null    object 
 5   safeForHours     360 non-null    float64
 6   confidence       360 non-null    float64
 7   riskLevel        360 non-null    object 
dtypes: bool(1), float64(3), object(4)
memory usage: 20.2+ KB


In [77]:
df['food_type'].value_counts()

food_type
raw           90
packaged      90
non_veg       90
cooked_veg    90
Name: count, dtype: int64

In [None]:
# Features
X = df.drop(columns=["riskLevel", "safeForHours"])

# Targets
y_class = df["riskLevel"]        # For classification
y_reg = df["safeForHours"]       # For regression


In [79]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

categorical = ["food_type", "storage", "environment", "is_sealed"]
numerical = ["time_since_prep", "confidence"]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
    ("num", StandardScaler(), numerical)
])


In [80]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Split
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_class, test_size=0.2, random_state=42)

# Classification pipeline
clf_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(random_state=42))
])

# Train
clf_pipeline.fit(X_train_c, y_train_c)

# Evaluate
y_pred_c = clf_pipeline.predict(X_test_c)
print("Classification Report:\n", classification_report(y_test_c, y_pred_c))


Classification Report:
               precision    recall  f1-score   support

        high       0.87      0.87      0.87        23
         low       0.83      0.89      0.86        27
      medium       0.70      0.64      0.67        22

    accuracy                           0.81        72
   macro avg       0.80      0.80      0.80        72
weighted avg       0.80      0.81      0.80        72



In [88]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error 
from sklearn.metrics import mean_squared_error

# Split
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X, y_reg, test_size=0.2, random_state=42)

# Regression pipeline
reg_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(random_state=42))
])

# Train
reg_pipeline.fit(X_train_r, y_train_r)

# Evaluate
y_pred_r = reg_pipeline.predict(X_test_r)
print(y_pred_r)
print("MAE:", mean_absolute_error(y_test_r, y_pred_r))


[12.299 12.724 16.175 15.086 14.308 16.007  9.443 11.247 14.377 15.394
 11.714 12.027 13.436 14.323 15.655 12.349 14.127 11.721 13.772 15.171
 15.222 12.515 10.044 15.429 14.595 14.287 16.843 11.633 14.74  13.779
 12.217 12.602 16.361 11.997 12.363 12.418 17.194 14.041  9.866 11.055
 18.509 13.696 10.042 11.96  17.695 16.121 16.695 13.215 13.556 14.996
 16.701 10.171 12.639 10.767 11.374 13.758 15.562  8.686 13.219  7.713
 11.682 12.678 11.712 14.795 16.051 15.143 12.944  7.644 10.206 12.281
 12.858 10.532]
MAE: 5.054652777777777


In [92]:
import joblib

# Save both models
joblib.dump(clf_pipeline, "riskLevel_model.pkl")
joblib.dump(reg_pipeline, "safeHours_model.pkl")


['safeHours_model.pkl']

In [103]:
# Load saved models
clf_model = joblib.load("riskLevel_model.pkl")
reg_model = joblib.load("safeHours_model.pkl")

# Sample input
sample = pd.DataFrame([{
    "food_type": "non_veg",
    "storage": "fridge",
    "time_since_prep": 6.5,
    "is_sealed": True,
    "environment": "dry",
    "confidence": 0.88
}])

# Predict
predicted_risk = clf_model.predict(sample)[0]
predicted_hours = reg_model.predict(sample)[0]

# print("🛑 Predicted Risk Level:", predicted_risk)
# print("⏱️ Predicted Safe For Hours:", round(predicted_hours, 2))

predicted_risk


'low'

In [107]:
import pandas as pd
import joblib
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import StringTensorType, FloatTensorType
from sklearn.model_selection import train_test_split

# ------------------ STEP 1: Load Data ------------------
df = pd.read_csv("food_safety_multitask_dataset.csv")

# ------------------ STEP 2: Define Feature and Target Columns ------------------
features = ["food_type", "storage", "time_since_prep", "is_sealed", "environment", "confidence"]
target_class = "riskLevel"
target_reg = "safeForHours"

X = df[features]
y_class = df[target_class]
y_reg = df[target_reg]

# ------------------ STEP 3: Split Data ------------------
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X, y_class, test_size=0.2, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.2, random_state=42)

# ------------------ STEP 4: Load Trained Pipelines ------------------
clf_pipeline = joblib.load("riskLevel_model.pkl")
reg_pipeline = joblib.load("safeHours_model.pkl")

# ------------------ STEP 5: Define Initial Types ------------------
initial_types = []
for col in features:
    if X[col].dtype == object:
        initial_types.append((col, StringTensorType([None, 1])))
    else:
        initial_types.append((col, FloatTensorType([None, 1])))

# ------------------ STEP 6: Export Classifier to ONNX (Fix ZipMap=True issue) ------------------
clf_options = {id(clf_pipeline): {"zipmap": False}}  # <- This is important for JS compatibility
clf_onnx = convert_sklearn(clf_pipeline, initial_types=initial_types, options=clf_options)

with open("risk_classifier.onnx", "wb") as f:
    f.write(clf_onnx.SerializeToString())
print("✅ Exported fixed classifier to 'risk_classifier.onnx'")

# ------------------ STEP 7: Export Regressor to ONNX (if needed) ------------------
reg_onnx = convert_sklearn(reg_pipeline, initial_types=initial_types)
with open("safe_hours_regressor.onnx", "wb") as f:
    f.write(reg_onnx.SerializeToString())
print("✅ Exported regressor to 'safe_hours_regressor.onnx'")


✅ Exported fixed classifier to 'risk_classifier.onnx'
✅ Exported regressor to 'safe_hours_regressor.onnx'
