In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pickle
import requests
import json

In [2]:
# Imputer to fill in missing values
# Classified the continous sequence into 'good' & 'bad' categories
df = pd.read_csv('winequality-red1.csv')
imputer = SimpleImputer()
imputer.fit_transform(df)
bins = (1, 6.5, 12)
group_names = ['bad', 'good']
df['quality'] = pd.cut(df['quality'], bins=bins, labels=group_names)
df['quality'] = df['quality'].cat.codes
X = df.drop(['quality'], axis=1)
y = df[['quality']]
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.25, random_state=47)


# Random Forest Classifier

In [3]:
clf1 = RandomForestClassifier(n_estimators=100, n_jobs=-1, max_depth=25)
clf1.fit(train_X, train_y)
y_pred = clf1.predict(test_X)
predicted = clf1.predict(test_X)
print(f"Accuracy score: {accuracy_score(test_y, predicted)}")

  


Accuracy score: 0.91


In [4]:
print(y_pred)

[1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 1
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0
 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]


In [5]:
print(test_X.head(3))

     fix_acid  vol_acid  cit_acid  res_sug  chlorides  frsuldiox  totsuldiox  \
423      10.5      0.24      0.47      2.1      0.066        6.0        24.0   
832      10.4      0.44      0.42      1.5      0.145       34.0        48.0   
782       9.0      0.82      0.05      2.4      0.081       26.0        96.0   

     density    pH  sulphates  alcohol  
423  0.99780  3.15       0.90     11.0  
832  0.99832  3.38       0.86      9.9  
782  0.99814  3.36       0.53     10.0  


In [6]:
pickle.dump(clf1, open('modelw2.pkl','wb'))

In [7]:
model = pickle.load(open('modelw2.pkl','rb'))

print(model.predict([[10.5,0.24,0.47,2.1,0.066,6.0,24.0,0.99780,3.15,0.90,11.0]]))

[1]


In [8]:
print (clf1.predict([[10.5,0.24,0.47,2.1,0.066,6.0,24.0,0.99780,3.15,0.90,11.0]]))

[1]
