In [11]:
# k-nearest neighbors on the Iris Flowers Dataset
from random import seed
from random import randrange
from csv import reader
from math import sqrt

# Load a CSV file
def load_csv(filename):
	dataset = list()
	with open(filename, 'r') as file:
		csv_reader = reader(file)
		for row in csv_reader:
			if not row:
				continue
			dataset.append(row)
	return dataset

# Convert string column to float
def str_column_to_float(dataset, column):
	for row in dataset:
		row[column] = float(row[column].strip())

# Convert string column to integer
def str_column_to_int(dataset, column):
	class_values = [row[column] for row in dataset]
	unique = set(class_values)
	lookup = dict()
	for i, value in enumerate(unique):
		lookup[value] = i
	for row in dataset:
		row[column] = lookup[row[column]]
	return lookup

# Find the min and max values for each column
def dataset_minmax(dataset):
	minmax = list()
	for i in range(len(dataset[0])):
		col_values = [row[i] for row in dataset]
		value_min = min(col_values)
		value_max = max(col_values)
		minmax.append([value_min, value_max])
	return minmax

# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
	for row in dataset:
		for i in range(len(row)):
			row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

# Split a dataset into k folds
def cross_validation_split(dataset, n_folds):
	dataset_split = list()
	dataset_copy = list(dataset)
	fold_size = int(len(dataset) / n_folds)
	for _ in range(n_folds):
		fold = list()
		while len(fold) < fold_size:
			index = randrange(len(dataset_copy))
			fold.append(dataset_copy.pop(index))
		dataset_split.append(fold)
	return dataset_split

# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
	correct = 0
	for i in range(len(actual)):
		if actual[i] == predicted[i]:
			correct += 1
	return correct / float(len(actual)) * 100.0

# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
	folds = cross_validation_split(dataset, n_folds)
	scores = list()
	for fold in folds:
		train_set = list(folds)
		train_set.remove(fold)
		train_set = sum(train_set, [])
		test_set = list()
		for row in fold:
			row_copy = list(row)
			test_set.append(row_copy)
			row_copy[-1] = None
		predicted = algorithm(train_set, test_set, *args)
		actual = [row[-1] for row in fold]
		accuracy = accuracy_metric(actual, predicted)
		scores.append(accuracy)
	return scores

# Calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
	distance = 0.0
	for i in range(len(row1)-1):
		distance += (row1[i] - row2[i])**2
	return sqrt(distance)

# Locate the most similar neighbors
def get_neighbors(train, test_row, num_neighbors):
	distances = list()
	for train_row in train:
		dist = euclidean_distance(test_row, train_row)
		distances.append((train_row, dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(num_neighbors):
		neighbors.append(distances[i][0])
	return neighbors

# Make a prediction with neighbors
def predict_classification(train, test_row, num_neighbors):
	neighbors = get_neighbors(train, test_row, num_neighbors)
	output_values = [row[-1] for row in neighbors]
	prediction = max(set(output_values), key=output_values.count)
	return prediction

# kNN Algorithm
def k_nearest_neighbors(train, test, num_neighbors):
	predictions = list()
	for row in test:
		output = predict_classification(train, row, num_neighbors)
		predictions.append(output)
	return(predictions)




In [12]:
# Test the kNN on the Iris Flowers dataset
seed(1)
filename = 'iris-n.csv'
dataset = load_csv(filename)
for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)
# convert class column to integers
str_column_to_int(dataset, len(dataset[0])-1)
# evaluate algorithm
n_folds = 5
num_neighbors = 5
scores = evaluate_algorithm(dataset, k_nearest_neighbors, n_folds, num_neighbors)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))

Scores: [0.0, 3.3333333333333335, 0.0, 0.0, 0.0]
Mean Accuracy: 0.667%


In [17]:
import numpy as np
a = np.arange(200)
a

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [47]:
import random
a = []
b = []
for i in range(0,200):
    n = random.randint(1,30)
    a.append(n)
#     n = random.randint(1,30)
#     b.append(n)
print(a,)

[12, 11, 8, 1, 17, 13, 1, 4, 11, 29, 9, 26, 23, 1, 7, 15, 18, 20, 1, 10, 11, 12, 19, 24, 18, 26, 6, 1, 25, 15, 13, 23, 1, 18, 18, 7, 15, 23, 3, 1, 23, 30, 9, 13, 14, 17, 18, 28, 5, 28, 8, 29, 8, 23, 27, 26, 4, 15, 9, 10, 25, 17, 9, 4, 12, 1, 7, 30, 10, 26, 20, 16, 23, 1, 19, 19, 3, 13, 16, 25, 10, 11, 17, 9, 25, 12, 10, 2, 15, 27, 13, 24, 24, 24, 30, 15, 9, 15, 15, 21, 15, 28, 28, 11, 13, 8, 20, 30, 28, 29, 14, 30, 3, 11, 4, 25, 10, 23, 12, 24, 26, 24, 22, 28, 27, 27, 14, 30, 21, 10, 14, 15, 6, 22, 1, 21, 5, 10, 10, 28, 3, 12, 2, 23, 21, 22, 20, 21, 26, 20, 12, 4, 7, 24, 2, 22, 15, 22, 2, 4, 10, 15, 9, 18, 20, 4, 23, 14, 5, 20, 16, 3, 6, 1, 10, 23, 9, 18, 6, 15, 21, 13, 12, 1, 9, 16, 11, 14, 23, 11, 8, 7, 30, 2, 2, 6, 28, 5, 19, 18]


In [24]:
randomlist = []
for i in range(0,100):
    n = random.randint(1,30)
    randomlist.append(n)
print(randomlist)

[20, 12, 26, 19, 5, 14, 7, 1, 28, 6, 12, 11, 5, 8, 17, 23, 5, 3, 8, 29, 21, 23, 9, 18, 20, 20, 6, 20, 16, 24, 4, 3, 27, 15, 21, 2, 16, 22, 15, 23, 20, 17, 8, 25, 13, 18, 11, 30, 12, 26, 5, 11, 16, 12, 4, 18, 23, 5, 23, 16, 24, 7, 19, 21, 21, 28, 27, 26, 1, 10, 27, 15, 17, 3, 22, 14, 21, 5, 5, 12, 14, 25, 26, 9, 22, 29, 22, 29, 25, 25, 8, 6, 16, 27, 6, 25, 9, 26, 27, 20]
