## Wine Quality Prediction

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

### Data Pre-Processing

In [None]:
wine_df=pd.read_csv('wine_quality_red.csv')

In [None]:
wine_df.head()

In [None]:
wine_df.shape

In [None]:
wine_df.info()

In [None]:
wine_df.describe()

In [None]:
wine_df.isnull().sum()

In [None]:
import seaborn as sns

sns.catplot(x='quality', data=wine_df, kind='count')

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(5, 5))
sns.barplot(x='quality', y='volatile acidity', data=wine_df)
plt.show()

In [None]:
plt.figure(figsize=(5, 5))
sns.barplot(x='quality', y='citric acid', data=wine_df)
plt.show()

In [None]:
# Correlation of all columns w.r.t quality
correlation=wine_df.corr()

In [None]:
plt.figure(figsize=(10, 10))
sns.heatmap(
    correlation,
    cbar=True,
    square=True,
    fmt='.1f',
    annot=True,
    annot_kws={'size': 8},
    cmap='Blues'
)
plt.show()

In [None]:
x=wine_df.drop(columns='quality', axis=1)
y=wine_df['quality'].apply(lambda y_val:1 if y_val>=7 else 0)

In [None]:
x.shape

In [None]:
y.shape

In [None]:
x_train, x_test, y_train, y_test=train_test_split(
    x, y,
    test_size=0.2,
    random_state=3
)

In [None]:
x_train.shape

In [None]:
x_test.shape

### Model Training

In [None]:
model=RandomForestClassifier(
    n_estimators=100,
    max_depth=5,
    random_state=3
)

In [None]:
model.fit(x_train, y_train)

In [None]:
train_data_prediction=model.predict(x_train)

In [None]:
train_acc_score=accuracy_score(y_train, train_data_prediction)
print(train_acc_score)

In [None]:
test_data_prediction=model.predict(x_test)

In [None]:
test_acc_score=accuracy_score(y_test, test_data_prediction)
print(test_acc_score)

In [None]:
import pickle

pickle.dump(model, open('model.pkl', 'wb'))