In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [66]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine

%matplotlib inline

In [67]:
wine=load_wine()
wine.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])

In [68]:
print(wine.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [69]:
feature=pd.DataFrame(wine.data,columns=wine.feature_names)
target=pd.DataFrame(wine.target,columns=['Target'])

In [70]:
df=pd.concat([feature,target],axis=1)

In [71]:
correlation=df.corr('pearson')
correlation

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,Target
alcohol,1.0,0.094397,0.211545,-0.310235,0.270798,0.289101,0.236815,-0.155929,0.136698,0.546364,-0.071747,0.072343,0.64372,-0.328222
malic_acid,0.094397,1.0,0.164045,0.2885,-0.054575,-0.335167,-0.411007,0.292977,-0.220746,0.248985,-0.561296,-0.36871,-0.192011,0.437776
ash,0.211545,0.164045,1.0,0.443367,0.286587,0.12898,0.115077,0.18623,0.009652,0.258887,-0.074667,0.003911,0.223626,-0.049643
alcalinity_of_ash,-0.310235,0.2885,0.443367,1.0,-0.083333,-0.321113,-0.35137,0.361922,-0.197327,0.018732,-0.273955,-0.276769,-0.440597,0.517859
magnesium,0.270798,-0.054575,0.286587,-0.083333,1.0,0.214401,0.195784,-0.256294,0.236441,0.19995,0.055398,0.066004,0.393351,-0.209179
total_phenols,0.289101,-0.335167,0.12898,-0.321113,0.214401,1.0,0.864564,-0.449935,0.612413,-0.055136,0.433681,0.699949,0.498115,-0.719163
flavanoids,0.236815,-0.411007,0.115077,-0.35137,0.195784,0.864564,1.0,-0.5379,0.652692,-0.172379,0.543479,0.787194,0.494193,-0.847498
nonflavanoid_phenols,-0.155929,0.292977,0.18623,0.361922,-0.256294,-0.449935,-0.5379,1.0,-0.365845,0.139057,-0.26264,-0.50327,-0.311385,0.489109
proanthocyanins,0.136698,-0.220746,0.009652,-0.197327,0.236441,0.612413,0.652692,-0.365845,1.0,-0.02525,0.295544,0.519067,0.330417,-0.49913
color_intensity,0.546364,0.248985,0.258887,0.018732,0.19995,-0.055136,-0.172379,0.139057,-0.02525,1.0,-0.521813,-0.428815,0.3161,0.265668


In [72]:
correlation["Target"].sort_values()

flavanoids                     -0.847498
od280/od315_of_diluted_wines   -0.788230
total_phenols                  -0.719163
proline                        -0.633717
hue                            -0.617369
proanthocyanins                -0.499130
alcohol                        -0.328222
magnesium                      -0.209179
ash                            -0.049643
color_intensity                 0.265668
malic_acid                      0.437776
nonflavanoid_phenols            0.489109
alcalinity_of_ash               0.517859
Target                          1.000000
Name: Target, dtype: float64

In [73]:
feature=(feature-feature.min())/(feature.max()-feature.min())

In [74]:
xtrain,xtest,ytrain,ytest = train_test_split(feature,target,test_size =0.3)

In [75]:
f1 = 'flavanoids'
f2 = 'od280/od315_of_diluted_wines'
f3 = 'total_phenols'
f4 = 'proline'
f5 = 'hue'
f6 = 'proanthocyanins'
f7 = 'alcohol'
f8 = 'magnesium'
f9 = 'ash'
f10 = 'color_intensity'
f11 = 'malic_acid'
f12 = 'nonflavanoid_phenols'
f13 = 'alcalinity_of_ash'

basex=np.array(xtrain[[f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13]])
basey=np.array(ytrain["Target"])

testx=np.array(xtest[[f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13]])
testy=np.array(ytest["Target"])

In [76]:
k=int(np.sqrt(len(target)))
k

13

In [77]:
def euclidean_distance(train_point,given_point):
    distance=np.sum((train_point-given_point)**2)
    
    return np.sqrt(distance)

In [78]:
def calc_distance_from_all(all_points,given_point,predictions):
    all_distances=[]
    
    for i, each in enumerate(all_points):
        distance=euclidean_distance(each,given_point)
        
        all_distances.append((distance,int(predictions[i])))
        
    all_distances.sort(key=lambda tup:tup[0])
   
    return all_distances

In [79]:
def get_neighbours(distances,count):
    
    return distances[:count]

In [80]:
def predict(all_points,given_point,predictions):
    distances=calc_distance_from_all(all_points,given_point,predictions)
    neighbours=get_neighbours(distances,k)
    
    op=[row[-1] for row in neighbours]
    prediction=max(set(op),key=op.count)
    return prediction

In [81]:
def accuracy(basex,basey,testx,testy):
    correct=0
    for i in range(len(testx)):
        p=predict(basex,testx[i],basey)
        
        if p==testy[i]:
            correct+=1
     
    return f'Accuracy:{correct*100/len(testy)}'

In [82]:
accuracy(basex,basey,testx,testy)

'Accuracy:92.5925925925926'