In [17]:
import math
import random
import csv
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine
from sklearn.datasets import load_iris

In [5]:
wine = load_wine()
wine

{'data': array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
         1.065e+03],
        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
         1.050e+03],
        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
         1.185e+03],
        ...,
        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
         8.350e+02],
        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
         8.400e+02],
        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
         5.600e+02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [6]:
data = wine.data
feature = wine.feature_names
target_name = wine.target_names
target = wine.target

In [7]:
dict = {
    "alcohol" : data[:,0] , 
    "malic_acid" : data[:,1] ,
    "ash" : data[:,2] ,
    "alcalinity_of_ash" : data[:,3] ,
    "magnesium" : data[:,4] ,
    "total_phenols" : data[:,5] ,
    "flavanoids" : data[:,6] ,
    "nonflavanoid_phenols" : data[:,7] ,
    "proanthocyanins" : data[:,8] ,
    "color_intensity" : data[:,9] ,
    "hue" : data[:,10] ,
    "od280/od315_of_diluted_wines" : data[:,11] ,
    "proline" : data[:,12] ,
    "Target" : target
}

In [9]:
dict

In [38]:
with open('wine_dataset.csv','w',newline = '') as f:
    writer = csv.writer(f)
    writer.writerow(dict.keys())
    t = zip(dict['alcohol'],dict['malic_acid'],dict['ash'],dict['alcalinity_of_ash'],dict['magnesium'],dict['total_phenols'],dict['flavanoids'],dict['nonflavanoid_phenols'],dict['proanthocyanins'],dict['color_intensity'],dict['hue'],dict['od280/od315_of_diluted_wines'],dict['proline'],dict['Target'])
    for key in t:
        writer.writerow(key)

In [39]:
class KNN:

  def __init__(self, k=5):

    self.k = k
    self.X_train = None
    self.y_train = None

  def fit(self, data, target):

    self.X_train = data
    self.y_train = target

  def predict(self, X_test):

    y_pred = []
    for x in X_test:
      distances = self._compute_distances(x)
      nearest_neighbors = np.argsort(distances)[:self.k]
      neighbor_labels = self.y_train[nearest_neighbors]
      predicted_label = self._majority_vote(neighbor_labels)
      y_pred.append(predicted_label)
    return np.array(y_pred)

  def _compute_distances(self, x):

      return np.sqrt(np.sum((self.X_train - x) ** 2, axis=1))

  def _majority_vote(self, labels):

    unique, counts = np.unique(labels, return_counts=True)
    return unique[np.argmax(counts)]

In [40]:
clf = KNN()

In [41]:
X_train, X_test, y_train, y_test = train_test_split(data,target,test_size=0.2)
clf.fit(X_train,y_train)

In [42]:
pred = clf.predict(X_test)

In [43]:
d = pd.DataFrame()
d["Actual"] = y_test
d["Predicted"] = pred
d.head()

Unnamed: 0,Actual,Predicted
0,1,2
1,1,1
2,0,0
3,0,0
4,2,2


In [44]:
def accuracy(x,y):
    c=0
    size = len(x)
    for i in range(len(x)):
        if x[i] == y[i]:
            c+=1
    
    return c/size

In [45]:
accuracy(y_test,pred)*100

83.33333333333334

In [22]:
csv_data = pd.read_csv('wine_dataset.csv')
csv_data.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,Target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0
