In [77]:
# importing train and test data sets
import pandas as pd
import numpy as np
df_train= pd.read_csv('https://raw.githubusercontent.com/rafathrb/test/main/wineQualityRed_train.csv')
df_test= pd.read_csv('https://raw.githubusercontent.com/rafathrb/test/main/wineQualityRed_test.csv')

df_train.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,10.6,0.28,0.39,15.5,0.069,6,23,1.0026,3.12,0.66,9.2,5
1,9.4,0.3,0.56,2.8,0.08,6,17,0.9964,3.15,0.92,11.7,8
2,10.6,0.36,0.59,2.2,0.152,6,18,0.9986,3.04,1.05,9.4,5
3,10.6,0.36,0.6,2.2,0.152,7,18,0.9986,3.04,1.06,9.4,5
4,10.6,0.44,0.68,4.1,0.114,6,24,0.997,3.06,0.66,13.4,6


In [78]:
#considering quality as class, creating a new column with good=1, bad=0
df_train['type']=(df_train['quality']>=7).astype('int')
df_test['type']=(df_test['quality']>=7).astype('int')

In [79]:
#preparing features and target for regression
x_train= df_train.drop(['type','quality'],axis=1)
y_train= df_train['type']
x_test= df_test.drop(['type','quality'],axis=1)
y_test= df_test['type']

# Logistic Regression

In [80]:
# performing logistic regression and predicting quality

from sklearn.linear_model import LogisticRegression
from sklearn import metrics
logreg= LogisticRegression()
logreg.fit(x_train,y_train)
y_pred=logreg.predict(x_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [81]:
#calculating accuracy, precision, recall and f1 scores
print('Accuracy of the model is',metrics.accuracy_score(y_pred,y_test))
print('Precision of the model is',metrics.precision_score(y_pred,y_test))
print('Recall Score of the model is',metrics.recall_score(y_pred,y_test))
print('F1 Score of the model is',metrics.f1_score(y_pred,y_test))

Accuracy of the model is 0.8729166666666667
Precision of the model is 0.14814814814814814
Recall Score of the model is 0.34782608695652173
F1 Score of the model is 0.20779220779220778


In [82]:
# calculating specificity and sensitivity from confusion matrix
tn, fp, fn, tp = metrics.confusion_matrix(y_pred,y_test).ravel()
specificity = tn / (tn+fp)
sensitivity= tp/(tp+fn)
print('Specificity is',specificity)
print('Sensitivity is',sensitivity)

Specificity is 0.899343544857768
Sensitivity is 0.34782608695652173


# Linear Regression as Classifier

In [83]:
# Using Linear Regression in Binary classification

from sklearn.linear_model import LinearRegression
reg= LinearRegression()
reg.fit(x_train,y_train)
y_pred1=reg.predict(x_test)
y_pred1

array([-1.24843063e-01, -4.48198225e-02, -2.59373494e-02,  1.72222612e-01,
       -1.24843063e-01, -1.22188729e-01, -7.65587210e-02,  1.53257966e-02,
       -1.06288933e-02,  2.25154254e-01, -8.34462049e-02,  2.25154254e-01,
       -3.50580355e-02,  3.67805573e-01, -4.69769819e-02, -1.63959082e-02,
        2.00652196e-01,  8.73025879e-02, -6.26121485e-02,  8.01827615e-02,
        8.15343559e-02,  4.92553551e-03,  1.70495291e-01, -1.68025283e-04,
        3.13503378e-02,  1.05360833e-02,  7.78085701e-02,  1.70495291e-01,
       -1.20973725e-01,  1.34471292e-02, -3.05050186e-02, -1.86764212e-02,
        2.78216066e-02, -9.65364749e-03, -4.30052903e-02, -2.48274689e-02,
        7.38154071e-02,  1.60378629e-01, -1.89157418e-01,  2.47909267e-01,
        2.47909267e-01, -3.91631862e-02,  5.57432618e-02,  3.52711681e-01,
       -4.43544669e-02,  2.77036163e-01, -8.39865972e-02,  8.57137942e-02,
        2.57703358e-02,  6.54095894e-03,  7.41814258e-02,  4.71400253e-02,
        6.13138718e-02,  

**Hence Linear Regression can't be used as a classifier. By looking at the predicted values array, there are values out of range. It should be either 1 or 0. Linear regression can't be used in classification problems**


# SVM

In [84]:
# feature scaling, for a better performance of SVM

from sklearn.preprocessing import StandardScaler
scaler= StandardScaler()
scaler.fit(x_train)
x_trs=scaler.transform(x_train)
scaler.fit(x_test)
x_tes= scaler.transform(x_test)

In [85]:
# Using SVM

from sklearn import svm
sv= svm.SVC()
sv.fit(x_trs,y_train)
y_pred2= sv.predict(x_tes)

In [86]:
#calculating accuracy, precision, recall and f1 scores
print('Accuracy of the model is',metrics.accuracy_score(y_pred2,y_test))
print('Precision of the model is',metrics.precision_score(y_pred2,y_test))
print('Recall Score of the model is',metrics.recall_score(y_pred2,y_test))
print('F1 Score of the model is',metrics.f1_score(y_pred2,y_test))

Accuracy of the model is 0.88125
Precision of the model is 0.2222222222222222
Recall Score of the model is 0.4444444444444444
F1 Score of the model is 0.2962962962962963


In [87]:
# calculating specificity and sensitivity from confusion matrix
tn, fp, fn, tp = metrics.confusion_matrix(y_pred2,y_test).ravel()
specificity = tn / (tn+fp)
sensitivity= tp/(tp+fn)
print('Specificity is',specificity)
print('Sensitivity is',sensitivity)

Specificity is 0.9072847682119205
Sensitivity is 0.4444444444444444


# Naives Bayesian Model

In [88]:
# feature scaling again

from sklearn.preprocessing import StandardScaler
scaler= StandardScaler()
scaler.fit(x_train)
x_trs=scaler.transform(x_train)
scaler.fit(x_test)
x_tes= scaler.transform(x_test)

In [89]:
# Using Naive Bayes model

from sklearn.naive_bayes import GaussianNB
gas= GaussianNB()
gas.fit(x_trs,y_train)
y_pred3=gas.predict(x_tes)

In [90]:
#calculating accuracy, precision, recall and f1 scores
print('Accuracy of the model is',metrics.accuracy_score(y_pred3,y_test))
print('Precision of the model is',metrics.precision_score(y_pred3,y_test))
print('Recall Score of the model is',metrics.recall_score(y_pred3,y_test))
print('F1 Score of the model is',metrics.f1_score(y_pred3,y_test))

Accuracy of the model is 0.8354166666666667
Precision of the model is 0.5
Recall Score of the model is 0.34177215189873417
F1 Score of the model is 0.40601503759398494


In [91]:
# calculating specificity and sensitivity from confusion matrix
tn, fp, fn, tp = metrics.confusion_matrix(y_pred3,y_test).ravel()
specificity = tn / (tn+fp)
sensitivity= tp/(tp+fn)
print('Specificity is',specificity)
print('Sensitivity is',sensitivity)

Specificity is 0.9326683291770573
Sensitivity is 0.34177215189873417
