Import libraries

In [None]:
import numpy as np
import pandas as pd
import random
import requests
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
import matplotlib.pyplot as plt
import skfuzzy as fuzz
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from math import radians, sin, cos, sqrt, atan2
from surprise import Dataset, Reader, KNNBasic, accuracy
from surprise.model_selection import train_test_split

Make a simple artificial dataset of users reviews

In [None]:
users = [f"user_{i}" for i in range(1, 31)]
cities = [
    "Tehran", "Mashhad", "Isfahan", "Shiraz", "Tabriz", "Kerman", "Kish", "Qeshm", "Rasht",
    "Ahvaz", "Yazd", "Urmia", "Kermanshah", "Sari", "Gorgan"
]

data = []
for user in users:
    destinations = random.sample(cities, 3)  
    for i, city in enumerate(destinations):
        rating = round(min(max(random.normalvariate(4, 0.5), 1), 5))  
        
        start_date = pd.Timestamp.now() - pd.Timedelta(days=random.randint(1, 365))
        end_date = start_date + pd.Timedelta(days=random.randint(2, 7))  
        
        data.append({
            "user_id": user,
            "destination": city,
            "rating": rating,
            "start_date": start_date,
            "end_date": end_date
        })

df_reviews = pd.DataFrame(data)


In [None]:
print(df_reviews)

In [None]:
df_reviews['start_date'] = pd.to_datetime(df_reviews['start_date'])
df_reviews['end_date'] = pd.to_datetime(df_reviews['end_date'])

df_reviews['start_month'] = df_reviews['start_date'].dt.month
df_reviews['end_month'] = df_reviews['end_date'].dt.month

df_reviews['month'] = df_reviews.apply(
    lambda row: (row['start_month'] + row['end_month']) / 2 if row['start_month'] != row['end_month'] 
    else row['start_month'], axis=1)

print(df_reviews[['user_id', 'destination', 'start_date', 'end_date', 'month']])


In [None]:
file_path = #r"User Reviews Dataset Path"
df_reviews.to_excel(file_path, index=False)

In [None]:
usersdata = df_reviews

Cities weather informations daataset

In [None]:
OPENWEATHER_API_KEY = #"API KEY"

cities = [
    "Tehran", "Mashhad", "Isfahan", "Shiraz", "Tabriz", "Kerman", "Kish", "Qeshm", "Rasht",
    "Ahvaz", "Yazd", "Urmia", "Kermanshah", "Sari", "Gorgan"
]

def get_weather_data(city):
    try:
        url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={OPENWEATHER_API_KEY}&units=metric"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            return {
                "city": city,
                "temperature": data["main"]["temp"],
                "feels_like": data["main"]["feels_like"],
                "cloudiness": data["clouds"]["all"],
                "humidity": data["main"]["humidity"],
                "rain": data.get("rain", {}).get("1h", 0),  
                "snow": data.get("snow", {}).get("1h", 0),  
                "timestamp": datetime.now()
            }
        else:
            print(f"Weather API Error for {city}: {response.status_code}")
            return {"city": city, "temperature": None, "feels_like": None, "cloudiness": None,
                    "humidity": None, "rain": None, "snow": None, "timestamp": datetime.now()}
    except Exception as e:
        print(f"Error fetching weather data for {city}: {e}")
        return {"city": city, "temperature": None, "feels_like": None, "cloudiness": None,
                "humidity": None, "rain": None, "snow": None, "timestamp": datetime.now()}

real_time_weather_data = []
for city in cities:
    print(f"Fetching weather data for {city}...")
    weather = get_weather_data(city)
    real_time_weather_data.append(weather)

df_weather = pd.DataFrame(real_time_weather_data)

In [None]:
print(df_weather)

In [None]:
df_weather['timestamp'] = pd.to_datetime(df_weather['timestamp'])

df_weather['month'] = df_weather['timestamp'].dt.month

print(df_weather)

In [None]:
file_path2 = #r"Cities weather dataset Path"
df_weather.to_excel(file_path2, index=False)

In [None]:
dynamicdata = df_weather

Cities static inforemation

In [None]:
OPENWEATHER_API_KEY = #"API KEY"

cities = [
    "Tehran", "Mashhad", "Isfahan", "Shiraz", "Tabriz", "Kerman", "Kish", "Qeshm", "Rasht",
    "Ahvaz", "Yazd", "Urmia", "Kermanshah", "Sari", "Gorgan"
]

def get_city_coordinates(city_name, api_key):

    base_url = "http://api.openweathermap.org/geo/1.0/direct"
    params = {"q": city_name, "limit": 1, "appid": api_key}
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        if data:
            return data[0]["lat"], data[0]["lon"]
    return None, None

def get_elevation(lat, lon, api_key):

    base_url = f"https://api.openweathermap.org/data/2.5/elevation"
    params = {"lat": lat, "lon": lon, "appid": api_key}
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        data = response.json()
        if "elevation" in data:
            return data["elevation"]
    return None

results = []
for city in cities:
    print(f"Processing city: {city}...")
    lat, lon = get_city_coordinates(city, OPENWEATHER_API_KEY)
    if lat and lon:
        elevation = get_elevation(lat, lon, OPENWEATHER_API_KEY)
        results.append({
            "City": city,
            "Latitude": lat,
            "Longitude": lon,
            "Elevation": elevation
        })
    else:
        print(f"Failed to fetch data for {city}.")

df = pd.DataFrame(results)

output_path = "cities_weather_data.xlsx"
df.to_excel(output_path, index=False)

print(f"Data saved to {output_path}")


In [None]:
staticdata = df

In [None]:
file_path2 = #r"Staticdata dataset Path"
staticdata.to_excel(file_path2, index=False)

In [None]:
usersdata = pd.read_excel('''r"User Reviews Dataset Path"''')
print(usersdata)
dynamicdata = pd.read_excel('''r"Cities weather dataset Path"''')
print(dynamicdata)
staticdata = pd.read_excel('''r"Staticdata dataset Path"''')
print(staticdata)

In [None]:
dynamicdata_hierachyc = dynamicdata

features = ["temperature", "feels_like", "cloudiness", "humidity", "rain", "snow"]
X = dynamicdata_hierachyc[features]

linkage_matrix = linkage(X, method="ward")

plt.figure(figsize=(10, 6))
dendrogram(linkage_matrix, labels=dynamicdata_hierachyc["city"].values, leaf_rotation=90, leaf_font_size=10)
plt.title("Hierarchical Clustering Dendrogram")
plt.xlabel("Cities")
plt.ylabel("Distance")
plt.show()

n_clusters = 4
clusters = fcluster(linkage_matrix, n_clusters, criterion="maxclust")
dynamicdata_hierachyc["cluster"] = clusters

print(dynamicdata_hierachyc)

In [None]:
features = ["temperature", "feels_like", "cloudiness", "humidity", "rain", "snow"]
X = dynamicdata_hierachyc[features]
scaler = StandardScaler()
dynamicdata_scaled = scaler.fit_transform(X)

cluster_range = range(2, 10)  
m_values = [1.5, 2, 2.5, 3]  

results = []

for n_clusters in cluster_range:
    for m in m_values:
        cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
            dynamicdata_scaled.T,
            n_clusters,
            m,
            error=0.005,
            maxiter=1000,
            init=None
        )

        min_distance = np.min(
            [np.linalg.norm(cntr[i] - cntr[j]) for i in range(n_clusters) for j in range(i + 1, n_clusters)]
        )
        xb = np.sum(np.min(d, axis=0)) / (len(X) * min_distance**2)

        results.append({
            'n_clusters': n_clusters,
            'm': m,
            'Xie-Beni': xb
        })

results_df = pd.DataFrame(results)

best_xb = results_df.loc[results_df['Xie-Beni'].idxmin()]

print("Best setting by Xie-Beni:")
print(best_xb)

plt.figure(figsize=(8, 6))

for m in m_values:
    plt.plot(results_df[results_df['m'] == m]['n_clusters'],
             results_df[results_df['m'] == m]['Xie-Beni'], label=f'm={m}')
plt.title("Xie-Beni vs Number of Clusters")
plt.xlabel("Number of Clusters")
plt.ylabel("Xie-Beni Index")
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()


In [None]:
features = ["temperature", "feels_like", "cloudiness", "humidity", "rain", "snow"]
X = dynamicdata_hierachyc[features]

scaler = StandardScaler()
dynamicdata2 = dynamicdata.copy()

X_scaled = scaler.fit_transform(X)

n_clusters = 3

cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
    X_scaled.T,  
    n_clusters,     
    1.5,              
    error=0.005,    
    maxiter=1000,   
    init=None       
)

cluster_membership = np.argmax(u, axis=0)

for i in range(n_clusters):
    dynamicdata2[f'cluster_{i}_membership'] = u[i, :]

dynamicdata2['cluster'] = cluster_membership

print("Fuzzy scores and Clusters")
print(dynamicdata2.head())

plt.figure(figsize=(10, 6))
for i in range(n_clusters):
    plt.scatter(X_scaled[cluster_membership == i, 0],
                X_scaled[cluster_membership == i, 1],
                label=f'Cluster {i}')
plt.scatter(cntr[:, 0], cntr[:, 1], s=300, c='red', marker='X', label='Centers')
plt.title('Fuzzy C-Means Clustering')
plt.xlabel('Feature 1 (e.g., temperature)')
plt.ylabel('Feature 2 (e.g., feels_like)')
plt.legend()
plt.grid()
plt.show()

In [None]:
print(dynamicdata2)

In [None]:
usersdata2 = usersdata2.drop(columns=['start_date', 'end_date', 'timestamp', 'start_month', 'end_month', 'month'])

users_grouped = usersdata2.groupby('user_id').apply(lambda x: x.sort_values('destination')).reset_index(drop=True)

users_grouped['destination_1'] = users_grouped.groupby('user_id')['destination'].transform(lambda x: x.iloc[0] if len(x) > 0 else None)
users_grouped['destination_2'] = users_grouped.groupby('user_id')['destination'].transform(lambda x: x.iloc[1] if len(x) > 1 else None)
users_grouped['destination_3'] = users_grouped.groupby('user_id')['destination'].transform(lambda x: x.iloc[2] if len(x) > 2 else None)

users_grouped['rating_1'] = users_grouped.groupby('user_id')['rating'].transform(lambda x: x.iloc[0] if len(x) > 0 else None)
users_grouped['rating_2'] = users_grouped.groupby('user_id')['rating'].transform(lambda x: x.iloc[1] if len(x) > 1 else None)
users_grouped['rating_3'] = users_grouped.groupby('user_id')['rating'].transform(lambda x: x.iloc[2] if len(x) > 2 else None)

users_grouped = users_grouped.drop(columns=['destination', 'rating'])

print(users_grouped)

In [None]:
users_final = users_grouped.drop_duplicates(subset='user_id')
users_final = users_final[['user_id', 'destination_1', 'rating_1', 'destination_2', 'rating_2', 'destination_3', 'rating_3']]
print(users_final)

In [None]:
users_final = users_final.sort_values(by='user_id').reset_index(drop=True)
print(users_final)

In [None]:
n_clusters = len([col for col in dynamicdata2.columns if col.startswith('cluster_') and col.endswith('_membership')])
print(n_clusters)

In [None]:
users_final2 =users_final

for cluster_id in range(n_clusters):  
    users_final2[f'rate_cluster_{cluster_id}'] = 0.0  

for index, user_row in users_final.iterrows():
    cluster_sums = {f'cluster_{i}_sum': 0.0 for i in range(n_clusters)} 
    cluster_counts = {f'cluster_{i}_count': 0 for i in range(n_clusters)} 
    
    for dest_col, rate_col in [('destination_1', 'rating_1'), ('destination_2', 'rating_2'), ('destination_3', 'rating_3')]:
        city = user_row[dest_col]
        rating = user_row[rate_col]

        city_data = dynamicdata2[dynamicdata2['city'] == city]
        if not city_data.empty and pd.notna(rating):
            for cluster_id in range(n_clusters):  
                membership = city_data.iloc[0][f'cluster_{cluster_id}_membership']
                cluster_sums[f'cluster_{cluster_id}_sum'] += membership * rating
                cluster_counts[f'cluster_{cluster_id}_count'] += 1

    for cluster_id in range(n_clusters):
        count = cluster_counts[f'cluster_{cluster_id}_count']
        if count > 0:  
            users_final2.at[index, f'rate_cluster_{cluster_id}'] = (
                cluster_sums[f'cluster_{cluster_id}_sum'] / count
            )

print(users_final2)



In [None]:
users_final2 = users_final2.drop(columns=['assigned_cluster'])

In [None]:
cities = [
    "Tehran", "Mashhad", "Isfahan", "Shiraz", "Tabriz", "Kerman", "Kish", "Qeshm", "Rasht",
    "Ahvaz", "Yazd", "Urmia", "Kermanshah", "Sari", "Gorgan"
]
for city in cities:
    users_final2[city] = 0

In [None]:
users_final3 = users_final2
for index, user_row in users_final3.iterrows():
    for city in cities:
        city_cluster = dynamicdata2.loc[dynamicdata2['city'] == city, 'cluster'].values[0]
        
        city_membership = dynamicdata2.loc[dynamicdata2['city'] == city, f'cluster_{city_cluster}_membership'].values[0]
        
        user_rate = user_row[f'rate_cluster_{city_cluster}']
        
        users_final3.at[index, city] = city_membership * user_rate
print(users_final3)

In [None]:
user_dynamic_rates = users_final3
print(user_dynamic_rates)

In [None]:
user_dynamic_rates.to_excel('''r"user dynamic rates Path"''', index=False)

In [None]:
staticdata2 =  staticdata

def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c

def compute_distance_matrix(staticdata2):
    city_names = staticdata2['City']
    distances = np.zeros((len(city_names), len(city_names)))
    for i, city1 in enumerate(city_names):
        for j, city2 in enumerate(city_names):
            if i != j:
                lat1, lon1 = staticdata2.loc[i, ['Latitude', 'Longitude']]
                lat2, lon2 = staticdata2.loc[j, ['Latitude', 'Longitude']]
                distance = haversine(lat1, lon1, lat2, lon2)
                distances[i, j] = distance
    return distances

def compute_feature_similarity(staticdata2):
    features = ['Elevation', 'Area', 'Population']
    city_features = staticdata2[features]
    scaler = StandardScaler()
    city_features_scaled = scaler.fit_transform(city_features)
    return cosine_similarity(city_features_scaled)

def combine_similarities(distance_matrix, feature_similarity_matrix, alpha=0.5):
    max_distance = np.max(distance_matrix)
    normalized_distance = 1 / (1 + distance_matrix / max_distance)  
    
    combined_similarity = alpha * feature_similarity_matrix + (1 - alpha) * normalized_distance
    return combined_similarity

distances = compute_distance_matrix(staticdata)
feature_similarity = compute_feature_similarity(staticdata)
combined_similarity = combine_similarities(distances, feature_similarity)

combined_similarity_df = pd.DataFrame(combined_similarity, index=staticdata['City'], columns=staticdata['City'])

print(combined_similarity_df)

In [None]:
users_final4 = users_grouped.drop_duplicates(subset='user_id')
users_final4 = users_final4[['user_id', 'destination_1', 'rating_1', 'destination_2', 'rating_2', 'destination_3', 'rating_3']]

users_final4 = users_final4.sort_values(by='user_id').reset_index(drop=True)

cities = [
    "Tehran", "Mashhad", "Isfahan", "Shiraz", "Tabriz", "Kerman", "Kish", "Qeshm", "Rasht",
    "Ahvaz", "Yazd", "Urmia", "Kermanshah", "Sari", "Gorgan"
]
for city in cities:
    users_final4[city] = 0
print(users_final4)

In [None]:
for idx, row in users_final4.iterrows():
    for i in range(1, 4):
        city_column = f'destination_{i}'
        rating_column = f'rating_{i}'
        
        city = row[city_column]
        rating = row[rating_column]
        
        if city in users_final4.columns:
            users_final4.loc[idx, city] = rating

print(users_final4)


In [None]:
users_final5 = users_final4.drop(columns = ['destination_1', 'destination_2', 'destination_3', 'rating_1', 'rating_2', 'rating_3'])
print(users_final5)

In [None]:
users_final5.to_excel('''r"users final5 Path"''', index=False)
combined_similarity_df.to_excel('''r"combined similarity df Path"''', index=False)
filled_scores.to_excel(r'''"filled scores Path"''', index=False)

In [None]:
user_city_scores = users_final5
user_scores = user_city_scores.drop(columns=['user_id'])

def fill_missing_scores(user_scores, similarity_df):
    for user_idx, user_row in user_scores.iterrows():
        missing_items = user_row[user_row == 0].index
        
        for item in missing_items:
            rated_items = user_row[user_row != 0].index
            
            weighted_scores = 0
            similarity_sum = 0
            for rated_item in rated_items:
                similarity = similarity_df.loc[item, rated_item]
                score = user_row[rated_item]
                weighted_scores += similarity * score
                similarity_sum += similarity
            
            if similarity_sum > 0:
                user_scores.at[user_idx, item] = weighted_scores / similarity_sum
            else:
                user_scores.at[user_idx, item] = 0  

    return user_scores

filled_scores = fill_missing_scores(user_scores.copy(), combined_similarity_df)
print(filled_scores)


In [None]:
users_final6 = pd.read_excel('''r"users_final5 Path"''')
print(users_final6)

In [None]:
df = users_final6.melt(id_vars='user_id', var_name='city', value_name='rating')
df = df[df['rating'] > 0]  
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'city', 'rating']], reader)

trainset, testset = train_test_split(data, test_size=0.2)

sim_options = {'name': 'cosine', 'user_based': True}
algo = KNNBasic(sim_options=sim_options)

algo.fit(trainset)

predictions = algo.test(testset)
rmse = accuracy.rmse(predictions)
print(f"RMSE on test set: {rmse}")

full_trainset = data.build_full_trainset()
algo.fit(full_trainset)

users = users_final6['user_id']
cities = users_final6.columns[1:]

final_predictions = users_final6.copy()

for user in users:
    for city in cities:
        if final_predictions.loc[final_predictions['user_id'] == user, city].values[0] == 0:
            pred = algo.predict(user, city).est  
            final_predictions.loc[final_predictions['user_id'] == user, city] = pred

print("Final predictions:")
print(final_predictions)

In [None]:
itembased_dynamic = pd.read_excel('''r"users dynamic rates Path"''')
itembased_static = pd.read_excel(''''r"users_static rates Path"''')
userbased = pd.read_excel('''r"user based Path"''')
print(itembased_dynamic)
print(itembased_static)
print(userbased)

In [None]:
final_scores = itembased_dynamic.copy() 
final_scores.iloc[:, 1:] = (
    0.50 * itembased_dynamic.iloc[:, 1:] +
    0.25 * itembased_static.iloc[:, 1:] +
    0.25 * userbased.iloc[:, 1:]
)

final_scores

In [None]:
recommended_cities = final_scores[['user_id']].copy()

for i in range(1, 6):
    recommended_cities[f'Recommended City {i}'] = (
        final_scores.iloc[:, 1:]  
        .apply(lambda row: row.nlargest(i).idxmin(), axis=1) 
    )

print(recommended_cities)

In [None]:
final_scores.to_excel('''r"final_scores Path"''', index=False)
recommended_cities.to_excel('''r'recommended cities Path"''', index=False)