In [1]:
import pandas as pd
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import numpy as np

# Загрузка данных
users = pd.read_csv('data/users.csv')
items = pd.read_csv('data/items.csv')
reviews = pd.read_csv('data/reviews.csv')


In [2]:
G = nx.Graph()

In [3]:
for _, row in items.iterrows():
    G.add_node(row['detail_id'], type='place', name=row['name'], rating=row['rating'])

for _, row in reviews.iterrows():
    G.add_node(row['profile_url'], type='user')
    G.add_edge(row['profile_url'], row['detail_id'], weight=row['mark'])

# Добавление рёбер между местами на основе схожести
# Формируем матрицу признаков для мест
place_features = items[['latitude', 'longitude', 'rating'] +
                       [col for col in items.columns if col.startswith('tags_') or col.startswith('cuisines_')]]
place_ids = items['detail_id'].values
scaler = StandardScaler()
place_features_scaled = scaler.fit_transform(place_features.fillna(0))

similarity_matrix = cosine_similarity(place_features_scaled)
threshold = 0.5

for i, place_id1 in enumerate(place_ids):
    for j, place_id2 in enumerate(place_ids):
        if i != j and similarity_matrix[i, j] > threshold:
            G.add_edge(place_id1, place_id2, weight=similarity_matrix[i, j])


In [8]:
def recommend_route(user_id, graph, top_n=5):
    if user_id not in graph:
        return "Пользователь не найден в графе."

    user_places = [neighbor for neighbor in graph.neighbors(user_id) if graph.nodes[neighbor]['type'] == 'place']

    similar_places = []
    for place_id in user_places:
        for neighbor in graph.neighbors(place_id):
            if graph.nodes[neighbor]['type'] == 'place' and neighbor not in user_places:
                similar_places.append((neighbor, graph[place_id][neighbor]['weight']))

    similar_places = sorted(similar_places, key=lambda x: -x[1])[:top_n]
    recommended_places = [place for place, _ in similar_places]

    return recommended_places

In [6]:
user_id = '/Profile/yalmaree'
recommended_places = recommend_route(user_id, G)
print("Рекомендуемые места для пользователя:", recommended_places)

Рекомендуемые места для пользователя: [12245093, 6977474, 6493701, 27484126, 8477924]


In [9]:
import pandas as pd
import json
import numpy as np

test_reviews = pd.read_csv('data/reviews_test.csv')
for _, row in test_reviews.iterrows():
    user_url = row['profile_url']
    place_id = row['detail_id']
    mark = row['mark']

    if place_id not in G:
        G.add_node(place_id, type='place', name=str(place_id), rating=0)

    if user_url not in G:
        G.add_node(user_url, type='user')

    # Теперь добавляем ребро user->place с весом = mark
    G.add_edge(user_url, place_id, weight=mark)

recommendations_dict = {}
with open('data/test_profile_urls.txt', 'r', encoding='utf-8') as f:
    for line in f:
        user_id = line.strip()
        top5_places = recommend_route(user_id, G, top_n=5)

        if isinstance(top5_places, str):
            recommendations_dict[user_id] = []
        else:
            top5_converted = []
            for place in top5_places:
                if isinstance(place, (np.integer, int)):
                    top5_converted.append(int(place))
                else:
                    top5_converted.append(place)
            recommendations_dict[user_id] = top5_converted

with open('recommendations_rec_route.json', 'w', encoding='utf-8') as outfile:
    json.dump(recommendations_dict, outfile, ensure_ascii=False, indent=2)

print("Результат сохранён в 'recommendations_rec_route.json'.")


KeyError: 'type'

In [10]:
for node in G.nodes:
    if 'type' not in G.nodes[node]:
        print("Узел без type:", node)

Узел без type: 17419620
Узел без type: 2559451
Узел без type: 10677264
Узел без type: 568288
Узел без type: 11747885
Узел без type: 10044521
Узел без type: 15057537
Узел без type: 3469339


In [11]:
nodes_without_type = [n for n in G.nodes if 'type' not in G.nodes[n]]
G.remove_nodes_from(nodes_without_type)

In [12]:
for node in G.nodes:
    if 'type' not in G.nodes[node]:
        print("Узел без type:", node)

In [13]:
import pandas as pd
import json
import numpy as np

test_reviews = pd.read_csv('data/reviews_test.csv')
for _, row in test_reviews.iterrows():
    user_url = row['profile_url']
    place_id = row['detail_id']
    mark = row['mark']

    if place_id not in G:
        G.add_node(place_id, type='place', name=str(place_id), rating=0)

    if user_url not in G:
        G.add_node(user_url, type='user')

    # Теперь добавляем ребро user->place с весом = mark
    G.add_edge(user_url, place_id, weight=mark)

recommendations_dict = {}
with open('data/test_profile_urls.txt', 'r', encoding='utf-8') as f:
    for line in f:
        user_id = line.strip()
        top5_places = recommend_route(user_id, G, top_n=5)

        if isinstance(top5_places, str):
            recommendations_dict[user_id] = []
        else:
            top5_converted = []
            for place in top5_places:
                if isinstance(place, (np.integer, int)):
                    top5_converted.append(int(place))
                else:
                    top5_converted.append(place)
            recommendations_dict[user_id] = top5_converted

with open('recommendations_rec_route.json', 'w', encoding='utf-8') as outfile:
    json.dump(recommendations_dict, outfile, ensure_ascii=False, indent=2)

print("Результат сохранён в 'recommendations_rec_route.json'.")

Результат сохранён в 'recommendations_rec_route.json'.
