# LOADING DATASET

In [39]:
import numpy as np
import pandas as pd

In [13]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving winequality-red.csv to winequality-red (2).csv
Saving winequality-white.csv to winequality-white (1).csv
User uploaded file "winequality-red.csv" with length 100951 bytes
User uploaded file "winequality-white.csv" with length 264426 bytes


In [40]:
dfr = pd.read_csv("winequality-red.csv")
print(dfr.head(5))

   fixed acidity  volatile acidity  citric acid  ...  sulphates  alcohol  quality
0            7.4              0.70         0.00  ...       0.56      9.4        5
1            7.8              0.88         0.00  ...       0.68      9.8        5
2            7.8              0.76         0.04  ...       0.65      9.8        5
3           11.2              0.28         0.56  ...       0.58      9.8        6
4            7.4              0.70         0.00  ...       0.56      9.4        5

[5 rows x 12 columns]


In [41]:
dfw = pd.read_csv("winequality-white.csv", sep=';')
print(dfw.head(5))

   fixed acidity  volatile acidity  citric acid  ...  sulphates  alcohol  quality
0            7.0              0.27         0.36  ...       0.45      8.8        6
1            6.3              0.30         0.34  ...       0.49      9.5        6
2            8.1              0.28         0.40  ...       0.44     10.1        6
3            7.2              0.23         0.32  ...       0.40      9.9        6
4            7.2              0.23         0.32  ...       0.40      9.9        6

[5 rows x 12 columns]


In [42]:
xr = dfr.drop('quality', axis = 1)
yr = dfr['quality']

In [43]:
xw = dfw.drop('quality', axis = 1)
yw = dfw['quality']

# SPLITTING FOR RED WINE

In [44]:
from sklearn.model_selection import train_test_split
xr_train, xr_test, yr_train, yr_test = train_test_split(xr, yr, test_size = 0.4, random_state = 1)

# SPLITTING FOR WHITE WINE

In [45]:
xw_train, xw_test, yw_train, yw_test = train_test_split(xw, yw, test_size = 0.4, random_state = 1)

# IMPORTING MODELS

In [46]:
from sklearn import svm
from sklearn.linear_model import  LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import  RandomizedSearchCV

# PARAMETER TUNING FOR RED WINE

In [47]:
param_svm = {'C':[1, 10, 20], 'kernel':['rbf', 'linear']}
param_dt = {'criterion':['mse', 'mae'], 'splitter':['best', 'random']}
param_log = {'C':[1, 5, 10]}
param_lin = {'fit_intercept':['True', 'False'], 'normalize':['True', 'False']}

# USING GRID SEARCH FOR TUNING MODEL FOR RED WINE

In [48]:
mod_svm = GridSearchCV(svm.SVR(gamma='auto'), param_svm, cv=5)
mod_dt = GridSearchCV(DecisionTreeRegressor(), param_dt, cv=5)
mod_log = GridSearchCV(LogisticRegression(solver='liblinear', multi_class='auto'), param_log, cv=5)
mod_lin = GridSearchCV(LinearRegression(), param_lin, cv=5)

FITTING DATASET FOR DIFFERENT MODELS

In [49]:
mod_svm.fit(xr_train, yr_train)
mod_dt.fit(xr_train, yr_train)
mod_log.fit(xr_train, yr_train)
mod_lin.fit(xr_train, yr_train)

GridSearchCV(cv=5, error_score=nan,
             estimator=LinearRegression(copy_X=True, fit_intercept=True,
                                        n_jobs=None, normalize=False),
             iid='deprecated', n_jobs=None,
             param_grid={'fit_intercept': ['True', 'False'],
                         'normalize': ['True', 'False']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

DECISION TREE REGRESSION

In [50]:
print("decision")
print(mod_dt.best_params_)
print(mod_dt.best_score_)

decision
{'criterion': 'mae', 'splitter': 'best'}
-0.06277437030823156


LOGISTIC REGRESSION

In [51]:
print("logistic")
print(mod_log.best_params_)
print(mod_log.best_score_)

logistic
{'C': 10}
0.5860165794066317


SVM REGRESSION

In [52]:
print("svm model")
print(mod_svm.best_params_)
print(mod_svm.best_score_)

svm model
{'C': 10, 'kernel': 'linear'}
0.33541301191851536


LINEAR REGRESSION

In [53]:
print("linear")
print(mod_lin.best_params_)
print(mod_lin.best_score_)

linear
{'fit_intercept': 'True', 'normalize': 'True'}
0.3388730253978155


# PARAMETER TUNING FOR WHITE WINE

**LOGISTIC REGRESSION PARAMETERS**

In [58]:
pen = ["l1", "l2"]
c = np.random.normal(1, 0.2, 10).astype(float)
par_log = {'penalty':pen, 'C':c}
par_log

{'C': array([1.2373117 , 0.91382886, 1.18822033, 1.28614209, 1.03419871,
        0.79828815, 1.09655777, 0.98034015, 1.44518284, 1.17085278]),
 'penalty': ['l1', 'l2']}

**SVM PARAMETERS**

In [62]:
g = np.random.uniform(0.0, 0.3, 5).astype(float)
c2 = np.random.normal(1, 0.1, 5).astype(float)
par_svm = {'gamma': list(g), 'C': list(c2)}
par_svm

{'C': [1.0083045311649232,
  1.0692718856520245,
  0.9704841793287746,
  0.8871938863750076,
  1.1650002094590417],
 'gamma': [0.07850487714252688,
  0.19034626136950197,
  0.056707789478802194,
  0.002665658167744911,
  0.12542602350143983]}

# USING RANDOM SEARCH FOR TUNING MODEL FOR WHITE WINE

**LOGISTIC REGRESSION**

In [59]:
mods_log = RandomizedSearchCV(LogisticRegression(max_iter=2500), par_log, cv = 2)

In [None]:
mods_log.fit(x_train, y_train)

In [61]:
print("logistic")
print(mods_log.best_params_)
print(mods_log.best_score_)

logistic
{'penalty': 'l2', 'C': 1.4451828387800405}
0.5906976744186047


**SVM**

In [63]:
mods_svm = RandomizedSearchCV(svm.SVC(kernel='rbf'), par_svm, n_iter=20)

In [64]:
mods_svm.fit(x_train, y_train)

RandomizedSearchCV(cv=None, error_score=nan,
                   estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                                 class_weight=None, coef0=0.0,
                                 decision_function_shape='ovr', degree=3,
                                 gamma='scale', kernel='rbf', max_iter=-1,
                                 probability=False, random_state=None,
                                 shrinking=True, tol=0.001, verbose=False),
                   iid='deprecated', n_iter=20, n_jobs=None,
                   param_distributions={'C': [1.0083045311649232,
                                              1.0692718856520245,
                                              0.9704841793287746,
                                              0.8871938863750076,
                                              1.1650002094590417],
                                        'gamma': [0.07850487714252688,
                                                  0.1903462

In [65]:
print("svm model")
print(mods_svm.best_params_)
print(mods_svm.best_score_)

svm model
{'gamma': 0.19034626136950197, 'C': 0.9704841793287746}
0.5674487508007686
