In [1]:
#importing necessary datasets
import pandas as pd
import numpy as np
df_train= pd.read_csv('https://raw.githubusercontent.com/rafathrb/test/main/wineQualityRed_train.csv')
df_test= pd.read_csv('https://raw.githubusercontent.com/rafathrb/test/main/wineQualityRed_test.csv')
df_train.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,10.6,0.28,0.39,15.5,0.069,6,23,1.0026,3.12,0.66,9.2,5
1,9.4,0.3,0.56,2.8,0.08,6,17,0.9964,3.15,0.92,11.7,8
2,10.6,0.36,0.59,2.2,0.152,6,18,0.9986,3.04,1.05,9.4,5
3,10.6,0.36,0.6,2.2,0.152,7,18,0.9986,3.04,1.06,9.4,5
4,10.6,0.44,0.68,4.1,0.114,6,24,0.997,3.06,0.66,13.4,6


In [15]:
#preparing target and feature variables
df_train['type']=(df_train['quality']>=7).astype('int')
df_test['type']=(df_test['quality']>=7).astype('int')
x_tr= df_train.drop(['quality','type'],axis=1)
y_train= df_train['type']
x_te= df_test.drop(['quality','type'],axis=1)
y_test= df_test['type']

# Feature scaling and PCA

In [16]:
from sklearn.preprocessing import StandardScaler
sca= StandardScaler()
x_train= sca.fit_transform(x_tr)
x_test= sca.fit_transform(x_te)

In [17]:
from sklearn.decomposition import PCA
pca= PCA(n_components=5)
pc_train= pca.fit_transform(x_train)
pc_test= pca.fit_transform(x_test)

# Logistic Regression

In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
logreg= LogisticRegression()
logreg.fit(pc_train,y_train)
y_pred=logreg.predict(pc_test)

In [19]:
#calculating accuracy, precision, recall and f1 scores
print('Accuracy of the model is',metrics.accuracy_score(y_pred,y_test))
print('Precision of the model is',metrics.precision_score(y_pred,y_test))
print('Recall Score of the model is',metrics.recall_score(y_pred,y_test))
print('F1 Score of the model is',metrics.f1_score(y_pred,y_test))

Accuracy of the model is 0.8833333333333333
Precision of the model is 0.018518518518518517
Recall Score of the model is 0.25
F1 Score of the model is 0.034482758620689655


In [20]:
# calculating specificity and sensitivity from confusion matrix
tn, fp, fn, tp = metrics.confusion_matrix(y_pred,y_test).ravel()
specificity = tn / (tn+fp)
sensitivity= tp/(tp+fn)
print('Specificity is',specificity)
print('Sensitivity is',sensitivity)

Specificity is 0.8886554621848739
Sensitivity is 0.25


# Linear Regression as Classifier

In [21]:
# Using Linear Regression in Binary classification

from sklearn.linear_model import LinearRegression
reg= LinearRegression()
reg.fit(pc_train,y_train)
y_pred1=reg.predict(pc_test)
y_pred1

array([ 0.22877752,  0.04278823,  0.15835235,  0.08798446,  0.22877752,
        0.19819484,  0.13540751,  0.26548136,  0.28447901, -0.11360683,
        0.13309718, -0.11360683,  0.16586391,  0.36384672, -0.40230161,
       -0.4026051 , -0.13010241,  0.32628868,  0.19153403,  0.26617142,
       -0.0114241 ,  0.01325893,  0.2780436 ,  0.14827097,  0.11697066,
        0.27265554,  0.34795544,  0.2780436 ,  0.20594856,  0.31365789,
        0.06663109,  0.14272639,  0.00787564, -0.45637785,  0.17714197,
        0.19734239,  0.34649208,  0.21610097,  0.39622105, -0.02546197,
       -0.02546197,  0.10736412,  0.43069579,  0.32862708,  0.33613348,
        0.22988536, -0.03836097,  0.21927714,  0.33351204,  0.07523211,
        0.33168147,  0.30228092,  0.32566482, -0.19334263, -0.05033426,
        0.0152873 ,  0.28310504, -0.37541208,  0.11794354,  0.1892595 ,
        0.08949799, -0.0130377 ,  0.22431466,  0.17550764,  0.27202732,
        0.27202732,  0.23278744,  0.35497557,  0.02483685,  0.34

**Hence Linear Regression can't be used as a classifier. By looking at the predicted values array, there are values out of range. It should be either 1 or 0. Linear regression can't be used in classification problems**


# SVM

In [23]:
# Using SVM

from sklearn import svm
sv= svm.SVC()
sv.fit(pc_train,y_train)
y_pred2= sv.predict(pc_test)

In [24]:
#calculating accuracy, precision, recall and f1 scores
print('Accuracy of the model is',metrics.accuracy_score(y_pred2,y_test))
print('Precision of the model is',metrics.precision_score(y_pred2,y_test))
print('Recall Score of the model is',metrics.recall_score(y_pred2,y_test))
print('F1 Score of the model is',metrics.f1_score(y_pred2,y_test))

Accuracy of the model is 0.8854166666666666
Precision of the model is 0.018518518518518517
Recall Score of the model is 0.3333333333333333
F1 Score of the model is 0.03508771929824561


In [25]:
# calculating specificity and sensitivity from confusion matrix
tn, fp, fn, tp = metrics.confusion_matrix(y_pred2,y_test).ravel()
specificity = tn / (tn+fp)
sensitivity= tp/(tp+fn)
print('Specificity is',specificity)
print('Sensitivity is',sensitivity)

Specificity is 0.8888888888888888
Sensitivity is 0.3333333333333333


# Naives Bayesian 

In [26]:
# Using Naive Bayes model

from sklearn.naive_bayes import GaussianNB
gas= GaussianNB()
gas.fit(pc_train,y_train)
y_pred3=gas.predict(pc_test)

In [27]:
#calculating accuracy, precision, recall and f1 scores
print('Accuracy of the model is',metrics.accuracy_score(y_pred3,y_test))
print('Precision of the model is',metrics.precision_score(y_pred3,y_test))
print('Recall Score of the model is',metrics.recall_score(y_pred3,y_test))
print('F1 Score of the model is',metrics.f1_score(y_pred3,y_test))

Accuracy of the model is 0.8875
Precision of the model is 0.018518518518518517
Recall Score of the model is 0.5
F1 Score of the model is 0.03571428571428571


In [28]:
# calculating specificity and sensitivity from confusion matrix
tn, fp, fn, tp = metrics.confusion_matrix(y_pred3,y_test).ravel()
specificity = tn / (tn+fp)
sensitivity= tp/(tp+fn)
print('Specificity is',specificity)
print('Sensitivity is',sensitivity)

Specificity is 0.8891213389121339
Sensitivity is 0.5
