In [10]:
from collections import Counter
import math

def knn(data, query, k, distance_fn, choice_fn):
    neighbor_distances_and_indices = []

    # 3. For each example in the data
    for index , example in enumerate(data):
        # 3.1 Calculate the distance between the query example and the current
        # example from the data.
        distance = distance_fn(example[:-1], query)

        # 3.2 Add the distance and the index of the example to an ordered collection
        neighbor_distances_and_indices.append((distance, index))

    # 4. Sort the ordered collection of distances and indices from
    # smallest to largest (in ascending order) by the distances
    sorted_neighbor_distances_and_indices = sorted(neighbor_distances_and_indices)

    # 5. Pick the first K entries from the sorted collection
    k_nearest_distances_and_indices = sorted_neighbor_distances_and_indices[:k]

    # 6. Get the labels of the selected K entries
    k_nearest_labels = [data[i][-1] for distance, i in k_nearest_distances_and_indices]

    # 7. If regression (choice_fn = mean), return the average of the K labels
    # 8. If classification (choice_fn = mode), return the mode of the K labels
    return k_nearest_distances_and_indices , choice_fn(k_nearest_labels)

def mean(labels):
    return sum(labels) / len(labels)

def mode(labels):
    return Counter(labels).most_common(1)[0][0]

#########################################################################################

def euclidean_distance(point1, point2):
    sum_squared_distance = 0
    for i in range(len(point1)):
        sum_squared_distance += math.pow(point1[i] - point2[i], 2)
    return math.sqrt(sum_squared_distance)

def mostSimilaPlanet(movie_query, k_recommendations):
    raw_planet_data = []
    with open('dataset.csv', 'r') as md:
        # Discard the first line (headings)
        
        next(md)

        # Read the data into memory
        for line in md.readlines():
            data_row = line.strip().split(',')
            raw_planet_data.append(data_row)

    # Prepare the data for use in the knn algorithm by picking
    # the relevant columns and converting the numeric columns
    # to numbers since they were read in as strings
    planet_recommendation_data = []
    for row in raw_planet_data:
        data_row = list(map(float, row[5:]))
        planet_recommendation_data.append(data_row)

    # Use the KNN algorithm to get the 5 planet that are most
    # similar to The Post.
    recommendation_indices, _ = knn(
        planet_recommendation_data, movie_query, k=k_recommendations,
        distance_fn=euclidean_distance, choice_fn=lambda x: None
    )

    movie_recommendations = []
    for _, index in recommendation_indices:
        movie_recommendations.append(raw_planet_data[index])

    return movie_recommendations

if __name__ == '__main__':
    the_post = [8.5,132.842,0.036,0.532,2459000.5,59000,20200531.0000000,.3051012213587488,2.747268314571767,1.90907339639573,18.3690218339117,358.2052821795358,47.81236788118992,26.32019653177608,3.585463232747804,.2164474421721038,2458878.899133444838,20200130.3991334,1663.221317781863,4.55365179406396,.959687,373.48138979,.43355] # feature vector for The Post
    recommended_planet = mostSimilaPlanet(movie_query=the_post, k_recommendations=1)

    # Print recommended movie titles
    print(recommended_planet)
    for recommendation in recommended_planet:
        print(recommendation[2])




[['a0000036', '2000036', '    36 Atalante', '36', 'Atalante', '8.5', '132.842', '0.036', '0.532', '2459000.5', '59000', '20200531.0000000', '.3051012213587488', '2.747268314571767', '1.90907339639573', '18.3690218339117', '358.2052821795358', '47.81236788118992', '26.32019653177608', '3.585463232747804', '.2164474421721038', '2458878.899133444838', '20200130.3991334', '1663.221317781863', '4.55365179406396', '.959687', '373.48138979', '.43355']]
    36 Atalante


In [27]:
#rajoute un 0 entre 2 virgule dans un fichier csv movies_recommendation_data.csv si il y a rien entre les 2 virgule
import csv
with open('dataset.csv', 'r') as f:
    reader = csv.reader(f)
    your_list = list(reader)
    for i in range(len(your_list)):
        for j in range(len(your_list[i])):
            if(your_list[i][j] == ''):
                your_list[i][j] = '0'
with open('dataset.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows(your_list)

#retire les saut de ligne inutile dans le fichier csv movies_recommendation_data.csv
with open('dataset.csv', 'r') as f:
    lines = f.readlines()
with open('dataset.csv', 'w') as f:
    for line in lines:
        if line.strip():
            f.write(line)






    