# Ranom Forest

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV

# Feature Extraction

In [None]:
pca_model = PCA(n_components=8).fit(train_df.drop(columns=['quality'])) 
trainpca = pca_model.transform(train_df.drop(columns=['quality']))  
testpca = pca_model.transform(test_df.drop(columns=['quality']))

# Parameters Grid

In [None]:
param_grid = {
    'n_estimators': [50, 100, 200],        
    'max_depth': [10, 50, 100],           
    'min_samples_split': [2, 5, 10],            
    'min_samples_leaf': [1, 2, 4],              
    'max_features': ['sqrt', 'log2'],   
    'bootstrap': [True, False]                 
}

# Grid Search&Best Model

In [None]:
grid_search = GridSearchCV(model, param_grid, cv=2, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search.fit(x_train, y_train)

best_model = grid_search.best_estimator_

print("Best parameters:", grid_search.best_params_)

Fitting 2 folds for each of 324 candidates, totalling 648 fits
Best parameters: {'bootstrap': True, 'max_depth': 50, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}

# Best Model Prediction Accuracy

In [None]:
acc = []
d = {}
clf = best_model

clf.fit(x_train, y_train)
pred = clf.predict(x_test)
acc.append(accuracy_score(pred, y_test))
d = {'Modelling Algo': 'Best model', 'Accuracy': acc}
print(d)

{'Modelling Algo': 'Best model', 'Accuracy': [0.6625]}

# 5 Folds Across Validaiton

In [None]:
kfold = KFold(n_splits=5, random_state=42, shuffle=True)
scores = cross_val_score(clf, x_train, y_train, cv=kfold)  
print(f'random forest: Mean Accuracy = {scores.mean()}, Standard Deviation = {scores.std()}')

random forest: Mean Accuracy = 0.6684256886611146, Standard Deviation = 0.03205876017396923