In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder
from haversine import haversine, Unit
from sklearn.neighbors import NearestNeighbors

In [3]:
# Load dataset
data = pd.read_csv("Attraction.csv")
data.sample(5)

Unnamed: 0,Destination,Location,latitude,longitude,tag,Unnamed: 5
42,Harati Devi Temple (Swayambhu area),"Swayambhu, Kathmandu",27.715671,85.291538,Temple,
35,Taleju Bhawani Temple (Bhaktapur),Bhaktapur Durbar Square,27.672689,85.428944,Temple,
26,Guhyeshwari Temple,"Gaushala, Kathmandu",27.711275,85.353448,Temple,
8,Boudhanath Stupa,"Boudha, Kathmandu",27.721493,85.359441,stupa,
4,The Taragoan Museum,"Boudha, Kathmandu",27.720193,85.353614,Museum,


In [5]:
data.drop("Unnamed: 5",axis=1,inplace=True)
data.sample(5)

Unnamed: 0,Destination,Location,latitude,longitude,tag
56,Gahana Pokhari,"Handigaun, Kathmandu",27.716819,85.332979,Pond
82,Sundarijal to Chisapani Hike,"Sundarijal, Kathmandu",27.789705,85.426028,Hiking route
57,Ikha Pokhari,"Wotu Tole, Patan",27.710243,85.30877,Pond
42,Harati Devi Temple (Swayambhu area),"Swayambhu, Kathmandu",27.715671,85.291538,Temple
67,KTM City Gaming Zone,"Gongabu, Kathmandu",27.71022,85.329057,Funland


In [6]:
# Encode tags
encoder = OneHotEncoder(sparse_output=False)
tag_matrix = encoder.fit_transform(data[['tag']])
tag_df = pd.DataFrame(tag_matrix, columns=encoder.get_feature_names_out(['tag']))

In [8]:
# Compute cosine similarity
cosine_sim_matrix = cosine_similarity(tag_df)
cosine_sim_matrix

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.]], shape=(86, 86))

In [9]:
# Compute proximity matrix
def calculate_proximity_matrix(data):
    proximity_matrix = np.zeros((len(data), len(data)))
    for i, row1 in data.iterrows():
        for j, row2 in data.iterrows():
            loc1 = (row1['latitude'], row1['longitude'])
            loc2 = (row2['latitude'], row2['longitude'])
            proximity_matrix[i, j] = haversine(loc1, loc2, unit=Unit.KILOMETERS)
    return proximity_matrix

proximity_matrix = calculate_proximity_matrix(data)

In [12]:
# Combine cosine similarity and proximity
def combine_similarity_and_proximity(cosine_sim, proximity_matrix, alpha=0.7):
    return alpha * cosine_sim + (1 - alpha) * (1 / (1 + proximity_matrix))

hybrid_matrix = combine_similarity_and_proximity(cosine_sim_matrix, proximity_matrix, alpha=0.7)
hybrid_matrix

array([[1.        , 0.75328316, 0.86499042, ..., 0.01974832, 0.04514845,
        0.02098884],
       [0.75328316, 1.        , 0.75115645, ..., 0.02607736, 0.03175594,
        0.02633205],
       [0.86499042, 0.75115645, 1.        , ..., 0.01903439, 0.0513993 ,
        0.02004067],
       ...,
       [0.01974832, 0.02607736, 0.01903439, ..., 1.        , 0.71505271,
        0.77572382],
       [0.04514845, 0.03175594, 0.0513993 , ..., 0.71505271, 1.        ,
        0.71538047],
       [0.02098884, 0.02633205, 0.02004067, ..., 0.77572382, 0.71538047,
        1.        ]], shape=(86, 86))

In [13]:
# Train k-NN on hybrid matrix
def build_knn_model(similarity_matrix, n_neighbors=4):
    knn = NearestNeighbors(n_neighbors=n_neighbors, metric='precomputed')
    knn.fit(1 - similarity_matrix)  # convert similarity to distance
    return knn

knn_model_hybrid = build_knn_model(hybrid_matrix)

In [14]:
# Save everything
np.save("models/hybrid_matrix.npy", hybrid_matrix)
np.save("models/proximity_matrix.npy", proximity_matrix)
np.save("models/cosine_sim_matrix.npy", cosine_sim_matrix)
with open("models/knn_model_hybrid.pkl", "wb") as f:
    pickle.dump(knn_model_hybrid, f)
with open("models/encoder.pkl", "wb") as f:
    pickle.dump(encoder, f)

print("✅ Training complete and models saved.")

✅ Training complete and models saved.
