In [1]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import pickle

# Load dataset
data = sns.load_dataset('tips')

# Mapping categorical columns
sex_mapping = {'Male': 0, 'Female': 1}
smoker_mapping = {'No': 0, 'Yes': 1}
day_mapping = {'Thur': 0, 'Fri': 1, 'Sat': 2, 'Sun': 3}
time_mapping = {'Lunch': 0, 'Dinner': 1}

data['sex'] = data['sex'].map(sex_mapping)
data['smoker'] = data['smoker'].map(smoker_mapping)
data['day'] = data['day'].map(day_mapping)
data['time'] = data['time'].map(time_mapping)

# Prepare features and target
x = data.drop(columns=['tip'])
y = data['tip']

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Model training
model = RandomForestRegressor()
model.fit(x_train, y_train)

# Predictions and evaluation
y_pred = model.predict(x_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"mean_absolute_error: {mae}")

# Save model
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)


mean_absolute_error: 0.752218367346939
