In [22]:
import pandas as pd
from igraph import Graph

# Pkt 3, 4
## Wczytanie danych

In [23]:
df = pd.read_csv(
    "input/out.radoslaw_email_email",
    sep=r"\s+",
    header=None,
    skiprows=2,
    engine="python"
)

df = df.iloc[:, :2]
df.columns = ["source", "target"]

print(df)


       source  target
0           1       2
1           1       3
2           1       4
3           1       5
4           1       6
...       ...     ...
82922       3      39
82923       3      39
82924      19      18
82925      85       3
82926      85      94

[82927 rows x 2 columns]


In [24]:
print("Min:", df[["source","target"]].min().min())
print("Max:", df[["source","target"]].max().max())
print("Unikalne ID:", len(pd.unique(df[["source","target"]].values.ravel())))
missing = set(range(1,168)) - set(pd.unique(df[["source","target"]].values.ravel()))
print("Brakujące numery:", missing)


Min: 1
Max: 167
Unikalne ID: 167
Brakujące numery: set()


## Stworzenie grafu skierowanego

In [25]:
# igraph wymaga etykiet jako string
df["source"] = df["source"].astype(str)
df["target"] = df["target"].astype(str)

# Tworzymy graf z zachowaniem ID jako 'name'
edges = list(df.itertuples(index=False, name=None))
g = Graph.TupleList(edges, directed=True, vertex_name_attr="name")

print(g.summary())

IGRAPH DN-- 167 82927 -- 
+ attr: name (v)


# 5. Usunięcie pętli i wielokrotnych krawędzi

In [26]:
g.simplify(multiple=True, loops=True, combine_edges=None)

print("Po simplify():")
print(g.summary())


Po simplify():
IGRAPH DN-- 167 5783 -- 
+ attr: name (v)


# 6. Liczenie wag zgodnie z formułą

In [27]:
cnt_ij = df.groupby(["source", "target"]).size().reset_index(name="cnt_ij")
cnt_i = df.groupby("source").size().reset_index(name="cnt_i")
cnt_ij = cnt_ij.merge(cnt_i, on="source")

cnt_ij["weight"] = cnt_ij["cnt_ij"] / cnt_ij["cnt_i"]

weight_dict = {
    (str(row.source), str(row.target)): row.weight
    for row in cnt_ij.itertuples()
}

weights = []
for e in g.es:
    s = g.vs[e.source]["name"]
    t = g.vs[e.target]["name"]
    weights.append(weight_dict.get((s, t), 0.0))

g.es["weight"] = weights



## Sprawdzenie wag

In [28]:
check_sums = []
for v in g.vs:
    out_edges = g.es.select(_source=v.index)
    wsum = sum(out_edges["weight"])
    check_sums.append((v["name"], wsum))

print("Powinno wyjść 1 dla każdego węzła:")
print(check_sums[:10])

Powinno wyjść 1 dla każdego węzła:
[('1', 1.0), ('2', 1.0), ('3', 1.0), ('4', 1.0), ('5', 1.0), ('6', 1.0), ('7', 1.0), ('8', 1.0), ('9', 1.0), ('10', 1.0)]
