In [None]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler,LabelEncoder

In [None]:
path='/kaggle/input/wine-quality/winequalityN.csv'
data=pd.read_csv(path)
data.head()

In [None]:
data.isnull().values.any()

In [None]:
data=data.fillna(data.mean())
data.isnull().values.any()

In [None]:
labels=data.pop('type')
data.head()

In [None]:
def scale_data(data):
    scaler=MinMaxScaler(feature_range=(0,1))
    X=np.array(data)
    X=scaler.fit_transform(X)
    return X,scaler
def encode_data(labels):
    y=np.array(labels)
    le=LabelEncoder()
    y=le.fit_transform(y)
    return y,le

In [None]:
X,scaler=scale_data(data)

In [None]:
print(X)

In [None]:
print(scaler.inverse_transform(X))

In [None]:
y,le=encode_data(labels)

In [None]:
print(y)

In [None]:
print(le.inverse_transform(y))

In [None]:
def find_acc(model,X_train,y_train,X_test,y_test):
    model.fit(X_train,y_train)
    p=model.predict(X_test)
    cm=confusion_matrix(y_true=y_test,y_pred=p)
    acc=cm.trace()/cm.sum()
    return acc*100

## Without Dimensionality reduction

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [None]:
model=RandomForestClassifier()
params={'max_depth':list(range(200,450,50)),'n_estimators':list(range(10,120,20))}

In [None]:
grid_clf=GridSearchCV(model,params,refit=True,verbose=3)

In [None]:
grid_clf.fit(X_train,y_train)

In [None]:
best_clf=grid_clf.best_estimator_
print(best_clf)

In [None]:
acc=find_acc(best_clf,X_train,y_train,X_test,y_test)
print(f'Accuracy: {acc}')

## With Dimensionality Reduction

In [None]:
pca=PCA(n_components=0.9999)
X_red=pca.fit_transform(np.array(data))
print(X_red.shape)

In [None]:
print(X_red)

In [None]:
X_red=scaler.fit_transform(X_red)
print(X_red)

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X_red,y,test_size=0.2)

In [None]:
model=RandomForestClassifier()

In [None]:
grid_clf=GridSearchCV(model,params,refit=True,verbose=3)

In [None]:
grid_clf.fit(X_train,y_train)

In [None]:
best_clf=grid_clf.best_estimator_
print(best_clf)

In [None]:
acc=find_acc(model,X_train,y_train,X_test,y_test)
print(f'Accuracy:{acc}')