<a href="https://colab.research.google.com/github/nrodman/FoodRecommendation/blob/main/neuralnetworkrecipes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

from google.colab import drive
drive.mount('/content/drive')
data_dir = '/content/drive/My Drive/project1/'
dataset_path = data_dir + 'reduced.csv'
recipes_df = pd.read_csv(dataset_path)
print("Columns in the merged dataset:")
print(recipes_df.columns)
print("Merged Dataset:")
print(recipes_df.head())

# defining feature columns
feature_columns = ['calories', 'fatcontent', 'proteincontent']
recipes_df = recipes_df.dropna(subset=feature_columns)
features = recipes_df[feature_columns]
target = (recipes_df['rating'] >= 4).astype(int)  # Binary classification: 1 for rating >= 4, else 0

# standardizing the features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# data splitting into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

# sequential neural network model
def build_nn_model():
    model = Sequential()
    model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# training and evaluating the model
def train_evaluate_nn(X_train, y_train, X_test, y_test):
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)

    fold_metrics = []

    for train_index, val_index in kfold.split(X_train):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]

        nn_model = build_nn_model()
        nn_model.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, verbose=0, validation_data=(X_val_fold, y_val_fold))

        y_val_pred = (nn_model.predict(X_val_fold) > 0.5).astype(int)

        accuracy = accuracy_score(y_val_fold, y_val_pred)
        f1 = f1_score(y_val_fold, y_val_pred, average='weighted')
        precision = precision_score(y_val_fold, y_val_pred, average='weighted')
        recall = recall_score(y_val_fold, y_val_pred, average='weighted')

        fold_metrics.append((accuracy, f1, precision, recall))

    # accuracy tests
    nn_model_final = build_nn_model()
    nn_model_final.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)
    y_test_pred = (nn_model_final.predict(X_test) > 0.5).astype(int)

    test_accuracy = accuracy_score(y_test, y_test_pred)
    test_f1 = f1_score(y_test, y_test_pred, average='weighted')
    test_precision = precision_score(y_test, y_test_pred, average='weighted')
    test_recall = recall_score(y_test, y_test_pred, average='weighted')

    print(f"\nFinal Test Set Evaluation - Accuracy: {test_accuracy}, F1 Score: {test_f1}, Precision: {test_precision}, Recall: {test_recall}")

    return fold_metrics, (test_accuracy, test_f1, test_precision, test_recall)

# training and evaluating the model again
fold_metrics, test_metrics = train_evaluate_nn(X_train, y_train, X_test, y_test)

# simulating 1 random user selection from the recipes
random_recipe = recipes_df.sample(1)
selected_recipe_id = random_recipe['recipeid'].values[0]
selected_recipe_name = random_recipe['name'].values[0]
print(f"\nUser Selected Recipe: {selected_recipe_name} (ID: {selected_recipe_id})")

# getting reviewerID
selected_recipe_reviewers = recipes_df[(recipes_df['recipeid'] == selected_recipe_id) & (recipes_df['rating'] == 5)]['reviewer'].unique()

# maximum of 4 reviewers limited
selected_reviewers_limited = selected_recipe_reviewers[:4]

# get top-rated recipes by a reviewer without the selected recipe
def get_top_recipes_by_reviewer(reviewer_id, selected_recipe_id, max_recipes=1):
    reviewer_recipes = recipes_df[(recipes_df['reviewer'] == reviewer_id) & (recipes_df['recipeid'] != selected_recipe_id)]
    top_recipes = reviewer_recipes.sort_values(by='rating', ascending=False).head(max_recipes)
    return top_recipes

# get recommendations
recommended_recipes = pd.DataFrame()

for reviewer in selected_reviewers_limited:
    top_recommendations = get_top_recipes_by_reviewer(reviewer, selected_recipe_id)
    recommended_recipes = pd.concat([recommended_recipes, top_recommendations])

# getting at least 4 recommendations
if recommended_recipes.shape[0] < 4:
    additional_recommendations = recipes_df[(recipes_df['rating'] >= 4) & (recipes_df['recipeid'] != selected_recipe_id) &
                                            (~recipes_df['recipeid'].isin(recommended_recipes['recipeid']))].drop_duplicates(subset='recipeid').head(4 - recommended_recipes.shape[0])
    recommended_recipes = pd.concat([recommended_recipes, additional_recommendations]).drop_duplicates(subset='recipeid')

# printing recommended recipes
print("\nRecommended Recipes Based on Similar Reviewers:")
print(recommended_recipes[['recipeid', 'name', 'reviewer']].head(4))


Mounted at /content/drive
Columns in the merged dataset:
Index(['recipeid', 'name', 'author', 'totaltime', 'datepublished',
       'description', 'images', 'recipecategory', 'keywords',
       'aggregatedrating', 'reviewcount', 'calories', 'fatcontent',
       'proteincontent', 'recipeinstructions', 'reviewid', 'reviewer',
       'rating', 'bigcategory'],
      dtype='object')
Merged Dataset:
   recipeid                              name  author totaltime  \
0     79703   Easy Seven Layer Rice Casserole   89831   PT1H35M   
1    284696     Baked Corn and Rice Casserole  128473   PT1H20M   
2    105682  Sugar and Cinnamon Spiced Pecans  131500   PT1H10M   
3     26039  Papa John's Garlic Dipping Sauce   27395      PT4M   
4    448521                  Baked Seitan Log  992845   PT1H35M   

               datepublished  \
0  2003-12-29 20:00:00+00:00   
1  2008-02-07 18:31:00+00:00   
2  2004-12-08 20:00:00+00:00   
3  2002-04-22 17:06:00+00:00   
4  2011-02-09 19:39:00+00:00   

        

  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))




  _warn_prf(average, modifier, msg_start, len(result))



Final Test Set Evaluation - Accuracy: 0.9456608659228514, F1 Score: 0.919250100570627, Precision: 0.8942744733379571, Recall: 0.9456608659228514
Fold 1 - Accuracy: 0.944880765386263, F1 Score: 0.9181022062497655, Precision: 0.8927996607969302, Recall: 0.944880765386263
Fold 2 - Accuracy: 0.9452615545718501, F1 Score: 0.9186624846942533, Precision: 0.8935194065515909, Recall: 0.9452615545718501
Fold 3 - Accuracy: 0.9453541507996953, F1 Score: 0.9187987391055079, Precision: 0.8936944704342131, Recall: 0.9453541507996953
Fold 4 - Accuracy: 0.9452113480578828, F1 Score: 0.9185886082655268, Precision: 0.8934244924974, Recall: 0.9452113480578828
Fold 5 - Accuracy: 0.943497715156131, F1 Score: 0.9160679033093965, Precision: 0.8901879385048397, Recall: 0.943497715156131

Final Test Set Evaluation - Accuracy: 0.9456608659228514, F1 Score: 0.919250100570627, Precision: 0.8942744733379571, Recall: 0.9456608659228514

User Selected Recipe: Betsy's Lemon-Cranberry Bread (ID: 317246)

Recommended R

  _warn_prf(average, modifier, msg_start, len(result))
