In [148]:
import seaborn as sns
df=sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [149]:
df=sns.load_dataset('tips')

In [150]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [151]:
X,y=df.drop(columns=['tip'],axis=1),df['tip']

In [152]:
y.head()

0    1.01
1    1.66
2    3.50
3    3.31
4    3.61
Name: tip, dtype: float64

In [153]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [154]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

In [155]:
categorical_cols=['sex','smoker','day','time']
numerical_cols=['total_bill','size']

In [156]:
num_pipeline=Pipeline(
    steps=[
        ('impute',SimpleImputer(strategy='mean')),
        ('scaler',StandardScaler())
    ]
)

cat_pipeline=Pipeline(
    steps=[
        ('impute',SimpleImputer(strategy='most_frequent')),
        ('encoder',OneHotEncoder())
    ]
)

preprocessor=ColumnTransformer(
    [
        ('num_pipeline',num_pipeline,numerical_cols),
        ('cat_pipeline',cat_pipeline,categorical_cols)
    ]
)

In [157]:
X_train=preprocessor.fit_transform(X_train)
X_test=preprocessor.fit_transform(X_test)

In [158]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
#automate model training and evaluation
models={
    'RandomForest':RandomForestRegressor(),
    'DecitionTree':DecisionTreeRegressor(),
    'SVC':SVR(),
    'LinearRegression':LinearRegression()
    
}

def evaluate_model(X_train,X_test,y_train,y_test,models):
    report = {}
    for i in range(len(models)):
        model=list(models.values())[i]
        model.fit(X_train,y_train)
        y_pred=model.predict(X_test)
        score=r2_score(y_test,y_pred)
        report[list(models.values())[i]]=score
    return report

In [160]:
evaluate_model(X_train,X_test,y_train,y_test,models)

{RandomForestRegressor(): 0.4023686464483519,
 DecisionTreeRegressor(): 0.31085378612448933,
 SVR(): 0.31966591958903134,
 LinearRegression(): 0.444823751692101}

In [None]:
import warnings
from sklearn.exceptions import DataConversionWarning

# Filter out warnings to simplify output
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DataConversionWarning)

# Your existing code for hyperparameter tuning
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor()

params = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(regressor, params, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Print the best parameters and the corresponding score
print("Best Parameters: ", grid_search.best_params_)
print("Best Score: ", -grid_search.best_score_)
