In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from google.colab import files
import joblib

In [None]:
# Load the Dataset
uploaded = files.upload()
data = pd.read_csv(list(uploaded.keys())[0])

In [None]:
# Data Preprocessing
X = data['Ingredients']
y = data['Healthy status']

In [None]:
# TF-IDF Vectorizer with N-grams
vectorizer = TfidfVectorizer(ngram_range=(1, 2))
X_tfidf = vectorizer.fit_transform(X)

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

In [None]:
# Train Logistic Regression Model
model = LogisticRegression(max_iter=1000, random_state=42, class_weight='balanced')
model.fit(X_train, y_train)

In [None]:
# Test the Model
y_pred = model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

In [None]:
# Save the Model and Vectorizer
joblib.dump(model, 'ingredient_classifier_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')

In [None]:
# Function to classify each ingredient individually
def classify_individual_ingredients(ingredient_list):
    results = {}
    for ingredient in ingredient_list:
        # Transform the ingredient text to TF-IDF features
        input_tfidf = vectorizer.transform([ingredient])
        # Predict the health status of the ingredient
        prediction = model.predict(input_tfidf)[0]
        # Store the result
        results[ingredient] = prediction
    return results

In [None]:
# Example Usage: Classify individual ingredients provided by the user
ingredients = ["Refined Palmolein Oil", "Banana", "Corn", "Food Acids (INS 330, INS 296)"]
results = classify_individual_ingredients(ingredients)

# Display the results
for ingredient, status in results.items():
    print(f"{ingredient} - {status}")

In [None]:
# Example Usage: Classify individual ingredients provided by the user
ingredients = ["Refined Palmolein Oil", "Spices", "Acidity Regulator (INS 330)", "Food Acids (INS 330, INS 296)"]
results = classify_individual_ingredients(ingredients)

# Display the results
for ingredient, status in results.items():
    print(f"{ingredient} - {status}")

In [None]:
# Example Usage: Classify individual ingredients provided by the user
ingredients = ["Refined Palmolein Oil", "Maltodextrin)", "Acidity Regulator (INS 330)", "Food Acids (INS 330, INS 296)"]
results = classify_individual_ingredients(ingredients)

# Display the results
for ingredient, status in results.items():
    print(f"{ingredient} - {status}")