In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset
data = pd.read_csv('reciepe_reviews.csv')

# Display the first few rows of the dataset
print(data.head())

# Handle missing values (you can choose to fill or drop them)
data['text'].fillna('', inplace=True)  # Filling missing text with empty string
data.dropna(subset=['stars'], inplace=True)  # Drop rows with missing stars

# Define features and target variable
X = data.drop(columns=['stars'])  # Features
y = data['stars']  # Target variable

# Encode categorical variables and scale continuous variables
categorical_features = ['recipe_name', 'comment_id', 'user_id', 'user_name', 'text']  # Add other categorical features if necessary
numeric_features = ['recipe_number', 'user_reputation', 'created_at', 'reply_count', 'thumbs_up', 'thumbs_down', 'best_score']  # Add other numeric features

# Create a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Preprocess the features
X_processed = preprocessor.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

   Unnamed: 0  recipe_number  recipe_code         recipe_name  \
0           0              1        14299  Creamy White Chili   
1           1              1        14299  Creamy White Chili   
2           2              1        14299  Creamy White Chili   
3           3              1        14299  Creamy White Chili   
4           4              1        14299  Creamy White Chili   

                                        comment_id         user_id  \
0  sp_aUSaElGf_14299_c_2G3aneMRgRMZwXqIHmSdXSG1hEM  u_9iFLIhMa8QaG   
1  sp_aUSaElGf_14299_c_2FsPC83HtzCsQAtOxlbL6RcaPbY  u_Lu6p25tmE77j   
2  sp_aUSaElGf_14299_c_2FPrSGyTv7PQkZq37j92r9mYGkP  u_s0LwgpZ8Jsqq   
3  sp_aUSaElGf_14299_c_2DzdSIgV9qNiuBaLoZ7JQaartoC  u_fqrybAdYjgjG   
4  sp_aUSaElGf_14299_c_2DtZJuRQYeTFwXBoZRfRhBPEXjI  u_XXWKwVhKZD69   

    user_name  user_reputation  created_at  reply_count  thumbs_up  \
0     Jeri326                1  1665619889            0          0   
1     Mark467               50  1665277687      

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['text'].fillna('', inplace=True)  # Filling missing text with empty string


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Define the ANN model
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='linear')  # Use 'softmax' for multi-class classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='mean_squared_error',  # Use 'sparse_categorical_crossentropy' for multi-class
              metrics=['mae'])  # Mean Absolute Error for regression tasks

# Display the model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [4]:
# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 81ms/step - loss: 2.3073 - mae: 1.0835 - val_loss: 2.1383 - val_mae: 1.0234
Epoch 2/10
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 78ms/step - loss: 1.0227 - mae: 0.7570 - val_loss: 2.2353 - val_mae: 1.1471
Epoch 3/10
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 78ms/step - loss: 0.3220 - mae: 0.4215 - val_loss: 2.2648 - val_mae: 1.1850
Epoch 4/10
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 76ms/step - loss: 0.2081 - mae: 0.3376 - val_loss: 2.1941 - val_mae: 1.1363
Epoch 5/10
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 78ms/step - loss: 0.1360 - mae: 0.2656 - val_loss: 2.1508 - val_mae: 1.1019
Epoch 6/10
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 80ms/step - loss: 0.0913 - mae: 0.2179 - val_loss: 2.1804 - val_mae: 1.1279
Epoch 7/10
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0

In [5]:
# Evaluate the model
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f'Test Mean Absolute Error: {test_mae:.4f}')

[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 2.2344 - mae: 1.0837
Test Mean Absolute Error: 1.0590
