In [67]:
import pandas as pd
import numpy as np
from sklearn.metrics import ndcg_score
from sklearn.model_selection import KFold
from sklearn.metrics.pairwise import cosine_similarity

# Učitavanje podataka iz CSV fajlova
destinations_df = pd.read_csv('Expanded_Destinations.csv')
user_history_df = pd.read_csv('Final_Updated_Expanded_UserHistory.csv')
users_df = pd.read_csv('Final_Updated_Expanded_Users.csv')

# Uklanjanje duplikata u users_df
users_df.drop_duplicates(subset='UserID', inplace=True)

# Kreiranje matrice korisnik-destinacija sa ocenama za sve destinacije
unique_destinations = destinations_df['DestinationID'].unique()
user_destination_matrix = user_history_df.pivot(index='UserID', columns='DestinationID', values='ExperienceRating').fillna(0)
user_destination_matrix = user_destination_matrix.reindex(columns=unique_destinations, fill_value=0)

# Prikaz matrice korisnik-destinacija za proveru
print("\nUser-Destination Matrix:")
print(user_destination_matrix.head())

# Izračunavanje kosinusne sličnosti između korisnika
user_similarity = cosine_similarity(user_destination_matrix)

# Kreiranje DataFrame-a sa sličnostima korisnika
user_similarity_df = pd.DataFrame(user_similarity, index=user_destination_matrix.index, columns=user_destination_matrix.index)

# Mapiranje korisničkih ID-ova na imena korisnika
user_id_to_name = users_df.set_index('UserID')['Name'].to_dict()

def collaborative_filtering_recommendations(train_user_history_df, user_id, top_n=5, num_similar_users=3, min_history=1, min_similar_users=2):
    if user_id not in user_similarity_df.index:
        print(f"Korisnik sa ID-em {user_id} nije pronađen u matrici sličnosti korisnika.")
        return []
    
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:num_similar_users+1]
    similar_users_ratings = user_destination_matrix.loc[similar_users]
    
    # Provera da li postoji dovoljno istorije poseta i sličnih korisnika
    user_history = train_user_history_df[train_user_history_df['UserID'] == user_id]
    if len(user_history) < min_history or len(similar_users) < min_similar_users:
        print(f"Korisnik {user_id_to_name.get(user_id, 'Nepoznat')} nema dovoljno istorije poseta ili sličnih korisnika za generisanje preporuka.")
        return []
    
    mean_ratings = similar_users_ratings.mean(axis=0)
    recommendations = mean_ratings.sort_values(ascending=False).index[:top_n]
    
    recommended_destinations = destinations_df[destinations_df['DestinationID'].isin(recommendations)]['Name'].values
    
    if len(recommended_destinations) == 0:
        print(f"Za korisnika {user_id_to_name.get(user_id, 'Nepoznat')} nisu generisane preporuke.")
    else:
        print(f"Za korisnika {user_id_to_name.get(user_id, 'Nepoznat')} dobijene preporuke: {recommended_destinations}")
    
    return recommended_destinations

# K-struka unakrsna validacija sa NDCG metrikom
def k_fold_cross_validation_ndcg(user_ids, k=5, top_n=5, num_similar_users=3, min_history=1, min_similar_users=2):
    kf = KFold(n_splits=k)
    ndcg_scores = []

    for train_index, test_index in kf.split(user_ids):
        train_users, test_users = user_ids[train_index], user_ids[test_index]
        train_user_history_df = user_history_df[user_history_df['UserID'].isin(train_users)]
        test_user_history_df = user_history_df[user_history_df['UserID'].isin(test_users)]

        for user_id in test_users:
            true_destinations = test_user_history_df[test_user_history_df['UserID'] == user_id]['DestinationID'].values
            recommended_destinations = collaborative_filtering_recommendations(train_user_history_df, user_id, top_n, num_similar_users, min_history, min_similar_users)
            if len(true_destinations) == 0 or len(recommended_destinations) == 0:
                print(f"Korisnik {user_id_to_name.get(user_id, 'Nepoznat')} nema istoriju poseta ili nema preporuka.")
                continue
            ndcg = evaluate_recommendations_ndcg(true_destinations, recommended_destinations, top_n)
            if ndcg == 0:
                print(f"Za korisnika {user_id_to_name.get(user_id, 'Nepoznat')} nismo dobili relevantne preporuke.")
            else:
                print(f"Za korisnika {user_id_to_name.get(user_id, 'Nepoznat')} dobijene preporuke: {recommended_destinations}")
                print(f"Tačne destinacije: {true_destinations}")
            ndcg_scores.append(ndcg)
    
    return np.mean(ndcg_scores)

# Evaluacija performansi modela sa NDCG metrikom
def evaluate_recommendations_ndcg(true_destinations, recommended_destinations, top_n=5):
    true_set = set(true_destinations)
    recommended_set = set(recommended_destinations)
    
    # Prilagodba y_true
    y_true = [1 if dest in true_set else 0 for dest in recommended_destinations]
    
    # Prilagodba y_score
    y_score = [1 if dest in true_set else 0 for dest in recommended_destinations]
    
    ndcg = ndcg_score([y_true], [y_score], k=top_n)
     
    return ndcg

# Dobijanje svih korisničkih identifikatora
user_ids = users_df['UserID'].values

# Izvršavanje K-struke unakrsne validacije sa NDCG metrikom i prikaz rezultata
ndcg_mean = k_fold_cross_validation_ndcg(user_ids, k=5, top_n=5, num_similar_users=3, min_history=1, min_similar_users=2)
print(f"Mean NDCG: {ndcg_mean}")



User-Destination Matrix:
DestinationID  1     2     3     4     5     6     7     8     9     10    \
UserID                                                                      
1                 0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   
2                 0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   
3                 0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   
5                 0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   
7                 0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   

DestinationID  ...  991   992   993   994   995   996   997   998   999   1000  
UserID         ...                                                              
1              ...   0.0     0   0.0   0.0     0   0.0   0.0   0.0     0   0.0  
2              ...   0.0     0   0.0   0.0     0   0.0   0.0   0.0     0   0.0  
3              ...   0.0     0   0.0   0.0     0   0.0   0.0   0.0     0   0.0  
5              ...   0.0     

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [61]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import KFold
from sklearn.metrics import ndcg_score

#Učitavanje podataka
destinations_df = pd.read_csv('Expanded_Destinations.csv')
user_history_df = pd.read_csv('Final_Updated_Expanded_UserHistory.csv')

# Surprise Dataset
reader = Reader(rating_scale=(1, 5))  # Assuming ratings are on a scale from 1 to 5
data = Dataset.load_from_df(user_history_df[['UserID', 'DestinationID', 'ExperienceRating']], reader)

# K-fold unakrsna validacija
def k_fold_cross_validation_ndcg(data, svd, k=5, top_n=5):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    ndcg_scores = []

    for trainset, testset in kf.split(data):
        svd.fit(trainset)

        predictions = svd.test(testset)
        
        true_destinations = []
        recommended_destinations = []

        for pred in predictions:
            true_destinations.append(pred.iid)
            recommended_destinations.append(pred.est)

        # Računanje NDCG score-a
        ndcg = ndcg_score([true_destinations], [recommended_destinations], k=top_n)
        ndcg_scores.append(ndcg)

    return np.mean(ndcg_scores)

# Inicijalizacija SVD klase
svd = SVD()

# 5 preporuka, ispisivanje metrike
top_n = 5
ndcg_mean = k_fold_cross_validation_ndcg(data, svd, k=5, top_n=top_n)
print(f"Mean NDCG score: {ndcg_mean:.4f}")

# Funkcija za generisanje preporuka, kolaborativno filtriranje
def get_svd_recommendations(user_id, top_n=5):
    trainset = data.build_full_trainset()  
    
    svd.fit(trainset)
    
    #Generisanje predviđanja za sve destinacije koje korisnik nije ocenio
    destinations_to_predict = destinations_df[~destinations_df['DestinationID'].isin(user_history_df[user_history_df['UserID'] == user_id]['DestinationID'])]['DestinationID']
    predictions = [svd.predict(user_id, destination_id) for destination_id in destinations_to_predict]
    
    #Sortiramo predviđanja po procenjenoj oceni u opadajućem redosledu
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    # Top N rekomendencija
    top_n_recommendations = predictions[:top_n]

    recommended_destinations = [destinations_df.loc[destinations_df['DestinationID'] == pred.iid, 'Name'].values[0] for pred in top_n_recommendations]
    
    return recommended_destinations

#Preporuke za korisnika 1
user_id = 1 
top_n = 5 
recommendations = get_svd_recommendations(user_id, top_n=top_n)
print(f"Top {top_n} recommendations for user {user_id}:")
for i, recommendation in enumerate(recommendations, 1):
    print(f"{i}. {recommendation}")


Mean NDCG score: 0.4905
Top 5 recommendations for user 1:
1. Kerala Backwaters
2. Leh Ladakh
3. Goa Beaches
4. Taj Mahal
5. Goa Beaches
