In [3]:
import pandas as pd
import networkx as nx

users = pd.read_csv('data/users.csv')
items = pd.read_csv('data/items.csv')
reviews = pd.read_csv('data/reviews.csv')

In [4]:
G = nx.DiGraph()

In [5]:
for _, row in items.iterrows():
    G.add_node(row['detail_id'], type='place', name=row['name'], rating=row.get('rating', 0))

for _, row in users.iterrows():
    G.add_node(row['profile_url'], type='user')

for _, row in reviews.iterrows():
    user = row['profile_url']
    place = row['detail_id']
    mark = row['mark']

    if G.has_node(user) and G.has_node(place):  # Проверяем, что узлы существуют в графе
        G.add_edge(user, place, weight=mark)

pagerank_values = nx.pagerank(G, alpha=0.85)

pagerank_places = {
    node: rank for node, rank in pagerank_values.items()
    if G.nodes[node].get('type') == 'place'
}

pagerank_sorted = sorted(pagerank_places.items(), key=lambda x: x[1], reverse=True)[:5]

In [6]:
print("Топ-5 мест по PageRank:")
for place, rank in pagerank_sorted:
    place_name = G.nodes[place]['name']
    print(f"Место: {place_name}, PageRank: {rank:.4f}")

Топ-5 мест по PageRank:
Место: Ботанический Сад  Аптекарский Огород, PageRank: 0.0015
Место: Выставка достижений народного хозяйства (ВДНХ), PageRank: 0.0011
Место: Центральный парк культуры и отдыха им. Горького, PageRank: 0.0011
Место: Московский Метрополитен, PageRank: 0.0011
Место: Музей космонавтики, PageRank: 0.0011


In [7]:
import pandas as pd
import networkx as nx
import json

test_reviews = pd.read_csv('data/reviews_test.csv')

for _, row in test_reviews.iterrows():
    user = row['profile_url']
    place = row['detail_id']
    mark = row['mark']

    if not G.has_node(user):
        G.add_node(user, type='user')

    if not G.has_node(place):
        # Минимальные атрибуты, чтобы не падать на месте
        G.add_node(place, type='place', name=str(place), rating=0)

    # Добавляем ребро user->place
    G.add_edge(user, place, weight=mark)

pagerank_values = nx.pagerank(G, alpha=0.85)

pagerank_places = {
    node: rank for node, rank in pagerank_values.items()
    if G.nodes[node].get('type') == 'place'
}

pagerank_sorted = sorted(
    pagerank_places.items(),
    key=lambda x: x[1],
    reverse=True
)

with open('data/test_profile_urls.txt', 'r', encoding='utf-8') as f:
    test_users = [line.strip() for line in f]

top_n = 5
recommendations_dict = {}

for user_id in test_users:
    top_places = pagerank_sorted[:top_n]  # список кортежей (place_id, rank)

    # Сохраняем только сами place_id (detail_id)
    recommended_place_ids = []
    for place_id, rank_val in top_places:
        recommended_place_ids.append(place_id)

    recommendations_dict[user_id] = recommended_place_ids

with open('recommendations_pagerank.json', 'w', encoding='utf-8') as outfile:
    json.dump(recommendations_dict, outfile, ensure_ascii=False, indent=2)

print("Готово! Результат сохранён в 'recommendations_pagerank.json'.")


Готово! Результат сохранён в 'recommendations_pagerank.json'.
