In [1]:
import pandas as pd
import numpy as np

from wordcloud import WordCloud
import matplotlib.pyplot as plt

from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.model_selection import train_test_split

from sklearn.metrics import explained_variance_score
from sklearn.metrics import max_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import r2_score

## Load data

In [2]:
df = pd.read_csv("../../data/sparkrecipes_filtered.csv")

In [3]:
df.head()

Unnamed: 0,recipe_id,title,total_calories,url,servings
0,19,Turkey Stuffed Cabbage,155.5,https://recipes.sparkpeople.com/recipe-detail....,10.0
1,25,Easy Lemon Chicken,318.0,https://recipes.sparkpeople.com/recipe-detail....,4.0
2,29,Bavarian Beef,256.1,https://recipes.sparkpeople.com/recipe-detail....,5.0
3,37,Garlic Mashed Potatoes,167.6,https://recipes.sparkpeople.com/recipe-detail....,4.0
4,40,Wonderful Stuffed Potatoes,164.4,https://recipes.sparkpeople.com/recipe-detail....,8.0


## Create baseline model

In [4]:
class BaselineModel(BaseEstimator, RegressorMixin):

    def __init__(self):
        pass
    
    def fit(self, _, y):
        self.mean_ = y.mean()
        return self
    
    def predict(self, X):
        check_is_fitted(self, [])
        
        return np.array(X.shape[0] * [self.mean_])

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df, df.total_calories, test_size=0.33, random_state=42)

In [9]:
model = BaselineModel().fit(X_train, y_train)

In [10]:
y_pred = model.predict(X_test)

In [20]:
df_results = pd.DataFrame({
    "experiment_name": "mean_baseline",
    "r2_score": [r2_score(y_test, y_pred)],
    "explained_variance_score": [explained_variance_score(y_test, y_pred)],
    "max_error": [max_error(y_test, y_pred)],
    "mean_absolute_error": [mean_absolute_error(y_test, y_pred)],
    "mean_squared_error": [mean_squared_error(y_test, y_pred)],
    "median_absolute_error": [median_absolute_error(y_test, y_pred)]
})
df_results

Unnamed: 0,experiment_name,r2_score,explained_variance_score,max_error,mean_absolute_error,mean_squared_error,median_absolute_error
0,mean_baseline,-6e-06,3.330669e-16,302.472897,89.707991,11959.954176,80.172897


In [21]:
df_results.to_csv("../../results/mean_baseline.csv", index=False)