In [1]:
import networkx as nx
import igraph as ig
import pickle
import pandas as pd

In [2]:
def load_part(name):
    with open(name, 'rb') as f:
        return pickle.load(f)

In [3]:
questions = pd.read_csv("../data/cache/prepared_questions_metrics.csv")
answers = pd.read_csv(
    "../data/cache/prepared_answers_metrics.csv", low_memory=False)

In [4]:
tags_graph = ig.Graph.from_networkx(
    nx.read_edgelist("../data/cache/tagsG.edgelist"))
answers_graph = ig.Graph.from_networkx(
    nx.read_edgelist("../data/cache/answersG.edgelist"))

In [5]:
tags_part = load_part("../data/cache/user_communities_a.pickle")
answers_part = load_part("../data/cache/user_communities_t.pickle")

In [6]:
def compute(G, partition, questions, answers):
    community_stats = {}

    complex_ids = questions[questions["is_complex"]]["Id"].to_list()

    for i, community in enumerate(partition):
        members = G.vs[community]
        total_answers = 0
        total_complex_answers = 0

        unique_answerers = set()
        unique_complex_answerers = set()

        for user in members:
            user_id = user["_nx_name"][1:]
            user_answers = answers[answers["OwnerUserId"] == float(user_id)]

            total_answers += len(user_answers)
            unique_answerers.add(user_id)

            complex_user_answers = answers[(answers["OwnerUserId"] == float(
                user_id)) & (answers["ParentId"].isin(complex_ids))]
            total_complex_answers += len(complex_user_answers)
            if len(complex_user_answers) > 0:
                unique_complex_answerers.add(user_id)

        community_stats[i] = {
            "community": community,
            "total_answers": total_answers,
            "total_complex_answers": total_complex_answers,
            "unique_answerers": len(unique_answerers),
            "unique_complex_answerers": len(unique_complex_answerers),
            "complex_to_total_ratio": total_complex_answers / total_answers if total_answers > 0 else 0,
            "complex_to_total_answerers_ratio": len(unique_complex_answerers) / len(unique_answerers) if len(unique_answerers) > 0 else 0
        }

    return community_stats

In [7]:
result = compute(tags_graph, tags_part, questions, answers)

In [8]:
barrier = 10
sorted_result = {k: v for k,
                 v in result.items() if v["total_answers"] >= barrier}

In [9]:
sorted_data = sorted(
    sorted_result.items(), key=lambda item: item[1]["complex_to_total_ratio"], reverse=True)

sorted_dict = {k: v for k, v in sorted_data}

i = 0
for key, value in sorted_dict.items():
    print(f"{key}: {value}")
    i += 1
    if i == 10:
        break

1562: {'community': [3952, 3953, 3954], 'total_answers': 15, 'total_complex_answers': 7, 'unique_answerers': 3, 'unique_complex_answerers': 1, 'complex_to_total_ratio': 0.4666666666666667, 'complex_to_total_answerers_ratio': 0.3333333333333333}
599: {'community': [1653, 1654], 'total_answers': 14, 'total_complex_answers': 3, 'unique_answerers': 2, 'unique_complex_answerers': 1, 'complex_to_total_ratio': 0.21428571428571427, 'complex_to_total_answerers_ratio': 0.5}
1071: {'community': [2806, 2807, 2808], 'total_answers': 19, 'total_complex_answers': 4, 'unique_answerers': 3, 'unique_complex_answerers': 2, 'complex_to_total_ratio': 0.21052631578947367, 'complex_to_total_answerers_ratio': 0.6666666666666666}
1347: {'community': [3449, 3450], 'total_answers': 24, 'total_complex_answers': 5, 'unique_answerers': 2, 'unique_complex_answerers': 1, 'complex_to_total_ratio': 0.20833333333333334, 'complex_to_total_answerers_ratio': 0.5}
362: {'community': [1081, 1082, 1083, 1084], 'total_answers'

In [10]:
sorted_data = sorted(
    sorted_result.items(), key=lambda item: item[1]["complex_to_total_answerers_ratio"], reverse=True)

sorted_dict = {k: v for k, v in sorted_data}

i = 0
for key, value in sorted_dict.items():
    print(f"{key}: {value}")
    i += 1
    if i == 10:
        break

307: {'community': [937, 938, 939], 'total_answers': 28, 'total_complex_answers': 4, 'unique_answerers': 3, 'unique_complex_answerers': 2, 'complex_to_total_ratio': 0.14285714285714285, 'complex_to_total_answerers_ratio': 0.6666666666666666}
1071: {'community': [2806, 2807, 2808], 'total_answers': 19, 'total_complex_answers': 4, 'unique_answerers': 3, 'unique_complex_answerers': 2, 'complex_to_total_ratio': 0.21052631578947367, 'complex_to_total_answerers_ratio': 0.6666666666666666}
48: {'community': [210, 211], 'total_answers': 352, 'total_complex_answers': 7, 'unique_answerers': 2, 'unique_complex_answerers': 1, 'complex_to_total_ratio': 0.019886363636363636, 'complex_to_total_answerers_ratio': 0.5}
131: {'community': [459, 460], 'total_answers': 25, 'total_complex_answers': 1, 'unique_answerers': 2, 'unique_complex_answerers': 1, 'complex_to_total_ratio': 0.04, 'complex_to_total_answerers_ratio': 0.5}
134: {'community': [468, 469], 'total_answers': 22, 'total_complex_answers': 2, 'u

Есть очень много отвечающие пользователи, у которых несколько ответов на сложные вопросы