# KNN For Beer Consumption on a weekend or not classification 



## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


##Importing and Fine tuning the dataset, dataset can be found in my github at https://github.com/joshtrivedi/Beer-Consumption/blob/main/beer_data.csv


In [2]:
dataset = pd.read_csv('beer_data.csv', decimal=',')
dataset.head()

Unnamed: 0,Data,Temperatura Media (C),Temperatura Minima (C),Temperatura Maxima (C),Precipitacao (mm),Final de Semana,Consumo de cerveja (litros)
0,2015-01-01,27.3,23.9,32.5,0.0,0.0,25.461
1,2015-01-02,27.02,24.5,33.5,0.0,0.0,28.972
2,2015-01-03,24.82,22.4,29.9,0.0,1.0,30.814
3,2015-01-04,23.98,21.5,28.6,1.2,1.0,29.799
4,2015-01-05,23.82,21.0,28.3,0.0,0.0,28.9


In [3]:
dataset.describe()

Unnamed: 0,Temperatura Media (C),Temperatura Minima (C),Temperatura Maxima (C),Precipitacao (mm),Final de Semana
count,365.0,365.0,365.0,365.0,365.0
mean,21.226356,17.46137,26.611507,5.196712,0.284932
std,3.180108,2.826185,4.317366,12.417844,0.452001
min,12.9,10.6,14.5,0.0,0.0
25%,19.02,15.3,23.8,0.0,0.0
50%,21.38,17.9,26.9,0.0,0.0
75%,23.28,19.6,29.4,3.2,1.0
max,28.86,24.5,36.5,94.8,1.0


In [4]:
dataset.shape

(941, 7)

In [5]:
#removing the NaN values from the dataframe
dataset = dataset.dropna()

In [6]:
dataset.shape

(365, 7)

In [7]:
#segregating the dataset into input features and target_variables
input_features = dataset.iloc[:,1:5]
input_features

Unnamed: 0,Temperatura Media (C),Temperatura Minima (C),Temperatura Maxima (C),Precipitacao (mm)
0,27.30,23.9,32.5,0.0
1,27.02,24.5,33.5,0.0
2,24.82,22.4,29.9,0.0
3,23.98,21.5,28.6,1.2
4,23.82,21.0,28.3,0.0
...,...,...,...,...
360,24.00,21.1,28.2,13.6
361,22.64,21.1,26.7,0.0
362,21.68,20.3,24.1,10.3
363,21.38,19.3,22.4,6.3


In [8]:
target_variable = dataset.iloc[:,-2]
target_variable

0      0.0
1      0.0
2      1.0
3      1.0
4      0.0
      ... 
360    1.0
361    0.0
362    0.0
363    0.0
364    0.0
Name: Final de Semana, Length: 365, dtype: float64

As no specific classification data columns are provided, we will take the **final-de-semana** field which is the indication of whether it is the weekend or not 

In [9]:
isWeekend = dataset.loc[target_variable==1.0]
isNotWeekend = dataset.loc[target_variable==0.0]

## Splitting into training and testing dataset using sklearn

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
input_features = np.array(input_features)
target_variable = np.array(target_variable)

In [12]:
X_train,X_test,Y_train,Y_test = train_test_split(input_features,target_variable,test_size=0.25,random_state=50)

In [13]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(273, 4)
(92, 4)
(273,)
(92,)


## Applying the KNN Classifier using the sklear library again

In [14]:
from sklearn.neighbors import KNeighborsClassifier

### For K = 3

In [15]:
KNN3 = KNeighborsClassifier(n_neighbors=3)

In [16]:
KNN3.fit(X_train,Y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [17]:
Predict3 = KNN3.predict(X_test)

In [18]:
from sklearn import metrics

In [19]:
Confusion = metrics.confusion_matrix(Y_test,Predict3)
print(Confusion)
print("Accuracy:",metrics.accuracy_score(Y_test, Predict3))
print("Precision:",metrics.precision_score(Y_test, Predict3))
print("Recall:",metrics.recall_score(Y_test, Predict3))

[[54 12]
 [24  2]]
Accuracy: 0.6086956521739131
Precision: 0.14285714285714285
Recall: 0.07692307692307693


### For K = 5

In [20]:
KNN5 = KNeighborsClassifier(n_neighbors=5)
KNN5.fit(X_train,Y_train)
Predict5 = KNN5.predict(X_test)
Confusion5 = metrics.confusion_matrix(Y_test,Predict5)
print(Confusion5)
print("Accuracy:",metrics.accuracy_score(Y_test, Predict5))
print("Precision:",metrics.precision_score(Y_test, Predict5))
print("Recall:",metrics.recall_score(Y_test, Predict5))


[[59  7]
 [24  2]]
Accuracy: 0.6630434782608695
Precision: 0.2222222222222222
Recall: 0.07692307692307693


### For K = 7

In [21]:
KNN7 = KNeighborsClassifier(n_neighbors=7)
KNN7.fit(X_train,Y_train)
Predict7 = KNN7.predict(X_test)
Confusion7 = metrics.confusion_matrix(Y_test,Predict7)
print(Confusion7)
print("Accuracy:",metrics.accuracy_score(Y_test, Predict7))
print("Precision:",metrics.precision_score(Y_test, Predict7))
print("Recall:",metrics.recall_score(Y_test, Predict7))

[[60  6]
 [25  1]]
Accuracy: 0.6630434782608695
Precision: 0.14285714285714285
Recall: 0.038461538461538464


### For K = 9

In [22]:
KNN9 = KNeighborsClassifier(n_neighbors=9)
KNN9.fit(X_train,Y_train)
Predict9 = KNN9.predict(X_test)
Confusion9 = metrics.confusion_matrix(Y_test,Predict9)
print(Confusion9)
print("Accuracy:",metrics.accuracy_score(Y_test, Predict9))
print("Precision:",metrics.precision_score(Y_test, Predict9))
print("Recall:",metrics.recall_score(Y_test, Predict9))

[[62  4]
 [25  1]]
Accuracy: 0.6847826086956522
Precision: 0.2
Recall: 0.038461538461538464


### For K = 11

In [23]:
KNN11 = KNeighborsClassifier(n_neighbors=11)
KNN11.fit(X_train,Y_train)
Predict11 = KNN11.predict(X_test)
Confusion11 = metrics.confusion_matrix(Y_test,Predict11)
print(Confusion11)
print("Accuracy:",metrics.accuracy_score(Y_test, Predict11))
print("Precision:",metrics.precision_score(Y_test, Predict11))
print("Recall:",metrics.recall_score(Y_test, Predict11))

[[65  1]
 [26  0]]
Accuracy: 0.7065217391304348
Precision: 0.0
Recall: 0.0


Precision and Recall Parameters have come down to 0 so we will keep the value of K below 11