In [3]:
import pandas as pd
import math
import operator
import csv
import random

#On charge et divise les données

def loadDataset(filename, split, trainingSet=[], testSet=[]):
    df = pd.read_csv(filename)
    data = df.values.tolist()
    for x in range(len(data) - 1):
        for y in range(4):
            data[x][y] = float(data[x][y])
        if random.random() < split:
# on ajoute à l'ensemble d'entraînement
            trainingSet.append(data[x])
        else:
# on ajoute à l'ensemble de test
            testSet.append(data[x])

#on calcule la distance euclidienne
def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

#on trouve les voisins les plus proches
def getNeighbors(trainingSet, testInstance, k):
    distances = []
    length = len(testInstance) - 1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testInstance, trainingSet[x], length)
        distances.append((trainingSet[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return (neighbors)
    print( neighbors)

#on obtient la réponse prédite
def getResponse(neighbors):
    classVotes = {}
    for x in range(len(neighbors)):
        response = neighbors[x][-1]
        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1
    sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0]

#on calcule l'exactitude des prédictions
def getAccuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    return (correct / float(len(testSet))) * 100.0

#fonction principale
def main():
    # Charger les données, les diviser en ensembles d'entraînement et de test, puis faire des prédictions
    trainingSet = []
    testSet = []
    loadDataset(r'C:\Users\hp\OneDrive\Documents\Master\M1\POO\MiniProjet\iris_custom.csv', 0.66, trainingSet, testSet)
    print('Train: ' + repr(len(trainingSet)))
    print('Test: ' + repr(len(testSet)))

    predictions = []
    k = 3
    for x in range(len(testSet)):
        neighbors = getNeighbors(trainingSet, testSet[x], k)
        print('Voisins les plus proches de', testSet[x], 'sont', neighbors)
        result = getResponse(neighbors)
        predictions.append(result)
        print('> prédit=' + repr(result) + ', réel=' + repr(testSet[x][-1]))

    accuracy = getAccuracy(testSet, predictions)
    print('Exactitude: ' + repr(accuracy) + '%')


main()




# Chargement et division de l'ensemble de données :
def load_iris_dataset(filename):
    dataset = []
    with open(filename) as f:
        reader = csv.reader(f)
        next(reader)  # Skip header
        for row in reader:
            dataset.append([float(x) for x in row[:4]] + [row[4]])
    return dataset


def split_dataset(dataset, split_ratio):
    train = []
    test = []
    for row in dataset:
        bucket = test if random.random() < split_ratio else train
        bucket.append(row)
    return train, test


# Model functions
def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += (instance1[x] - instance2[x]) ** 2
    return (distance ** 0.5)


def manhattanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += abs(instance1[x] - instance2[x])
    return distance

#Recherche des voisins les plus proches :
def get_neighbors(training_set, sample, k, distance_func):
    distances = []
    length = len(sample) - 1

    for x in range(len(training_set)):
        dist = distance_func(sample, training_set[x], length)
        distances.append((training_set[x], dist))

    distances.sort(key=operator.itemgetter(1))

    neighbors = [neighbor[0] for neighbor in distances[:k]]

    return neighbors

#Prédiction pour un échantillon de test :
def predict_sample(neighbors):
    class_votes = {}
    for neighbor in neighbors:
        response = neighbor[-1]
        if response in class_votes:
            class_votes[response] += 1
        else:
            class_votes[response] = 1
    sorted_votes = sorted(class_votes.items(), key=operator.itemgetter(1), reverse=True)
    return sorted_votes[0][0]

#Évaluation de l'exactitude :
def evaluate(test_set, predictions):
    correct = 0
    for i in range(len(test_set)):
        if test_set[i][-1] == predictions[i]:
            correct += 1
    return correct / float(len(test_set))


#Calcul de l'exactitude globale :
def get_accuracy(training_set, test_set, k, distance_func):
    predictions = []
    for test_instance in test_set:
        neighbors = get_neighbors(training_set, test_instance, k, distance_func)
        result = predict_sample(neighbors)
        predictions.append(result)
    accuracy = evaluate(test_set, predictions)
    return accuracy


if __name__ == "__main__":

    dataset = load_iris_dataset( r'C:\Users\hp\OneDrive\Documents\Master\M1\POO\MiniProjet\iris_custom.csv')

    train, test = split_dataset(dataset, 0.66)


    print(train)
    print(test)



    k_value = 3  # You can adjust the value of k
    accuracy_euclidean = get_accuracy(train, test, k_value, euclideanDistance)
    print(f"precision avec Euclidean Distance: {accuracy_euclidean * 100:.2f}%")


    accuracy_manhattan = get_accuracy(train, test, k_value, manhattanDistance)
    print(f"precision avec Manhattan : {accuracy_manhattan * 100:.2f}%")

Train: 106
Test: 42
Voisins les plus proches de [4.7, 3.2, 1.3, 0.2, 'Iris-setosa'] sont [[4.6, 3.2, 1.4, 0.2, 'Iris-setosa'], [4.6, 3.4, 1.4, 0.3, 'Iris-setosa'], [4.8, 3.0, 1.4, 0.3, 'Iris-setosa']]
> prédit='Iris-setosa', réel='Iris-setosa'
Voisins les plus proches de [4.6, 3.1, 1.5, 0.2, 'Iris-setosa'] sont [[4.6, 3.2, 1.4, 0.2, 'Iris-setosa'], [4.8, 3.1, 1.6, 0.2, 'Iris-setosa'], [4.8, 3.0, 1.4, 0.3, 'Iris-setosa']]
> prédit='Iris-setosa', réel='Iris-setosa'
Voisins les plus proches de [5.0, 3.6, 1.4, 0.2, 'Iris-setosa'] sont [[5.0, 3.5, 1.3, 0.3, 'Iris-setosa'], [5.0, 3.4, 1.5, 0.2, 'Iris-setosa'], [5.1, 3.4, 1.5, 0.2, 'Iris-setosa']]
> prédit='Iris-setosa', réel='Iris-setosa'
Voisins les plus proches de [5.4, 3.9, 1.7, 0.4, 'Iris-setosa'] sont [[5.7, 3.8, 1.7, 0.3, 'Iris-setosa'], [5.4, 3.7, 1.5, 0.2, 'Iris-setosa'], [5.3, 3.7, 1.5, 0.2, 'Iris-setosa']]
> prédit='Iris-setosa', réel='Iris-setosa'
Voisins les plus proches de [4.8, 3.0, 1.4, 0.1, 'Iris-setosa'] sont [[4.9, 3.0, 1.4