In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
dataset = pd.read_csv("/kaggle/input/red-wine-quality-cortez-et-al-2009/winequality-red.csv")

In [None]:
dataset.head()

In [None]:
dataset.info()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

corrmat = dataset.corr()
plt.subplots(figsize=(10, 5))
sns.heatmap(corrmat, vmax=0.9, square=True)

We'll do binary target classes because we dont have much data:
1 - Wine is good
0 - Wine is not good

In [None]:
dataset['Quality']=0
dataset.loc[dataset['quality']>6, 'Quality'] = 1
dataset.tail()

In [None]:
from sklearn.utils import resample, shuffle

zero= dataset[dataset['Quality']==0]
ones= dataset[dataset['Quality']==1]

upsampled= resample(ones, replace=True, n_samples=zero.shape[0])

dataset_new= pd.concat([zero, upsampled])
dataset_new= shuffle(dataset_new)
sns.countplot(dataset_new['Quality'])

In [None]:
#FInding outliers
from scipy import stats

Q1 = dataset_new.quantile(0.25)
Q3 = dataset_new.quantile(0.75)
IQR = Q3 - Q1
print(IQR)

filter_result = (dataset_new < (Q1 - 1.5 * IQR)) |(dataset_new > (Q3 + 1.5 * IQR))

In [None]:
dataset_filtered = dataset_new[~((dataset_new < (Q1 - 1.5 * IQR)) |(dataset_new > (Q3 + 1.5 * IQR))).any(axis=1)]
dataset_filtered.shape

In [None]:
y = dataset_filtered.Quality
dataset_filtered.drop('quality', axis=1, inplace=True)
dataset_filtered.drop('Quality', axis=1, inplace=True)
X = dataset_filtered

In [None]:
print("X shape: ", X.shape)
print("y shape: ", y.shape)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

In [None]:
print("Train shape: ", X_train.shape)
print("Test shape: ", X_test.shape)

In [None]:
#Data scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [None]:
#Finding best params (and model itself) for model with grid search
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline


np.random.seed(0)

pipe = Pipeline([("classifier", RandomForestClassifier())])

search_space = [{"classifier": [RandomForestClassifier()],
                "classifier__n_estimators":[5, 10, 50, 100],
                "classifier__max_features":[1, 2, 3],
                "classifier__max_depth":[5, 10, 30, 50, 100]},
                {"classifier": [KNeighborsClassifier()],
                 "classifier__n_neighbors":[2,3,5,7]}
               ]

gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=0)

best_model = gridsearch.fit(X_train_scaled, y_train)

In [None]:
#Get best model
best_model.best_estimator_.get_params()["classifier"]

In [None]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(max_depth=100, max_features=1, random_state=0)
clf.fit(X_train_scaled, y_train)

In [None]:
from sklearn.metrics import accuracy_score

y_forest_pred = clf.predict(X_test_scaled)
print("Test score: ", accuracy_score(y_test, y_forest_pred))