In [5]:
import pandas as pd
import numpy as np
import networkx as nx
from pathlib import Path
import plotly.graph_objects as go

In [ ]:
BASE = Path("input")  # путь к папке с входными файлами
OUT =  Path("output")  # путь к папке с результатами
OUT.mkdir(parents=True, exist_ok=True)  # создаём папку, если не существует


In [6]:
clean = pd.read_csv(BASE / "outbreaks_clean.csv")  # загружаем основной датафрейм с данными о вспышках
nodes_df = clean[["iso3", "country"]].drop_duplicates()  # выбираем уникальные сочетания iso3 и названия страны
iso_to_country = dict(nodes_df.values)  # создаём словарь iso3 -> название страны

region_map = {}  # словарь iso3 -> регион
iso_path = BASE / "isocodes.csv"  # путь к файлу с регионами
if iso_path.exists():  # если файл существует
    try:
        iso = pd.read_csv(iso_path, sep=";")  # загружаем файл
        col = {c.lower(): c for c in iso.columns}  # приводим имена колонок к нижнему регистру
        if "iso3" in col and "region" in col:  # проверка наличия нужных колонок
            for _, r in iso.iterrows():  # проходим по строкам
                iso3 = str(r[col["iso3"]]).strip()  # код страны
                region = str(r[col["region"]]).strip()  # регион
                if iso3:  # если код есть
                    region_map[iso3] = region if region else "Other"  # добавляем в словарь
    except Exception:
        pass  # если ошибка — пропускаем

def node_region(iso3: str) -> str:  # функция возвращает регион по коду страны
    return region_map.get(iso3, "Other")

region_palette = {  # палитра цветов по регионам
    "Africa": "#1f77b4",
    "Americas": "#ff7f0e",
    "Asia": "#2ca02c",
    "Europe": "#d62728",
    "Oceania": "#9467bd",
    "Antarctica": "#8c564b",
    "Other": "#7f7f7f",
}

def edges_with_years_for_disease(disease: str) -> pd.DataFrame:
    # возвращает список связей по странам с количеством лет совместных вспышек болезни
    pairs = {}
    sub = clean[clean["disease"] == disease][["year", "iso3"]].drop_duplicates()
    for y, g in sub.groupby("year", dropna=True):
        cc = g["iso3"].tolist()
        m = len(cc)
        if m < 2: 
            continue
        for i in range(m):
            for j in range(i+1, m):
                a, b = sorted([cc[i], cc[j]])
                key = (a, b)
                pairs.setdefault(key, set()).add(int(y))
    rows = []
    for (a, b), years in pairs.items():
        rows.append({"iso3_a": a, "iso3_b": b, "weight": len(years), "years": sorted(years)})
    ed = pd.DataFrame(rows).sort_values("weight", ascending=False) if rows else pd.DataFrame(columns=["iso3_a","iso3_b","weight","years"])
    return ed

def edges_with_diseases_for_year(year: int) -> pd.DataFrame:
    # возвращает связи по странам в конкретный год по совместным заболеваниям
    pairs = {}
    sub = clean[clean["year"] == year][["disease", "iso3"]].drop_duplicates()
    for dis, g in sub.groupby("disease", dropna=True):
        cc = g["iso3"].tolist()
        m = len(cc)
        if m < 2:
            continue
        for i in range(m):
            for j in range(i+1, m):
                a, b = sorted([cc[i], cc[j]])
                key = (a, b)
                pairs.setdefault(key, set()).add(dis)
    rows = []
    for (a, b), diseases in pairs.items():
        rows.append({"iso3_a": a, "iso3_b": b, "weight": len(diseases), "diseases": sorted(diseases)})
    ed = pd.DataFrame(rows).sort_values("weight", ascending=False) if rows else pd.DataFrame(columns=["iso3_a","iso3_b","weight","diseases"])
    return ed

def figure_from_edges(edges: pd.DataFrame, title: str, detail_col: str, topN: int = 200) -> go.Figure:
    # рисует граф связей на основе списка edges
    if edges.empty:
        fig = go.Figure()
        fig.update_layout(title=title + " — no edges", margin=dict(l=0,r=0,t=40,b=0))
        return fig

    edges_top = edges.head(topN).copy()  # берём топ-N связей

    G = nx.Graph()  # создаём граф
    for _, r in edges_top.iterrows():
        G.add_edge(r["iso3_a"], r["iso3_b"], weight=float(r["weight"]))  # добавляем ребро

    G.remove_nodes_from(list(nx.isolates(G)))  # убираем одиночные узлы

    if G.number_of_nodes() == 0:
        fig = go.Figure()
        fig.update_layout(title=title + " — graph empty after filtering", margin=dict(l=0,r=0,t=40,b=0))
        return fig

    pos = nx.spring_layout(G, seed=42)  # координаты узлов по алгоритму spring
    strength = {n: float(sum(d.get("weight",1.0) for _,_,d in G.edges(n, data=True))) for n in G.nodes()}

    # Рисуем ребра 
    traces = []
    wvals = edges_top["weight"].values
    wmin, wmax = wvals.min(), wvals.max()
    def width_for(w):  # нормализация толщины линии
        if wmax == wmin:
            return 2.0
        return 1.0 + 4.0 * (w - wmin) / (wmax - wmin)

    for _, r in edges_top.iterrows():
        u, v, w = r["iso3_a"], r["iso3_b"], int(r["weight"])
        if (u not in G) or (v not in G):
            continue
        x0, y0 = pos[u]
        x1, y1 = pos[v]
        if detail_col == "years":
            details = ", ".join(str(x) for x in r["years"]) if isinstance(r["years"], (list, tuple)) else str(r["years"])
            hover = f"{iso_to_country.get(u,u)} — {iso_to_country.get(v,v)}<br><b>Weight:</b> {w}<br><b>Years:</b> {details}"
        else:
            details = ", ".join(r["diseases"]) if isinstance(r["diseases"], (list, tuple)) else str(r["diseases"])
            hover = f"{iso_to_country.get(u,u)} — {iso_to_country.get(v,v)}<br><b>Weight:</b> {w}<br><b>Diseases:</b> {details}"
        traces.append(go.Scatter(
            x=[x0, x1], y=[y0, y1],
            mode='lines',
            line=dict(width=width_for(w), color="#666"),
            hoverinfo='text',
            hovertext=hover,
            showlegend=False
        ))

    # Рисуем узлы по регионам
    regions_present = {}
    for n in G.nodes():
        regions_present.setdefault(node_region(n), []).append(n)

    node_traces = []
    for rname, nodes in regions_present.items():
        xs = [pos[n][0] for n in nodes]
        ys = [pos[n][1] for n in nodes]
        sizes = [10 + 4*np.sqrt(strength.get(n,0)) for n in nodes]
        texts = [n for n in nodes]
        hovers = [f"{iso_to_country.get(n,n)} (ISO3: {n})<br>Strength: {strength.get(n,0):.1f}<br>Degree: {G.degree(n)}<br>Region: {rname}" for n in nodes]
        color = region_palette.get(rname, region_palette["Other"])
        node_traces.append(go.Scatter(
            x=xs, y=ys, mode='markers+text',
            text=texts, textposition="top center",
            marker=dict(size=sizes, color=color),
            hovertext=hovers, hoverinfo='text',
            name=rname, showlegend=True
        ))

    # --- Финальный граф ---
    fig = go.Figure()
    for tr in traces:
        fig.add_trace(tr)
    for tr in node_traces:
        fig.add_trace(tr)

    fig.update_layout(
        title=title,
        legend=dict(title="Region"),
        margin=dict(l=0, r=0, t=60, b=0),
        annotations=[dict(
            x=0.5, y=1.08, xref='paper', yref='paper', showarrow=False,
            text="Edge width ∝ weight; hover edges for details", font=dict(size=12)
        )]
    )
    return fig

# Графы по заболеваниям
diseases_sorted = clean["disease"].value_counts().head(15).index.tolist()
fig_disease = go.Figure()
traces_per_disease = []

for i, dis in enumerate(diseases_sorted):
    ed = edges_with_years_for_disease(dis)
    fig = figure_from_edges(ed, f"Co-occurrence network — Disease: {dis}", detail_col="years")
    traces_per_disease.append(len(fig.data))
    for tr in fig.data:
        tr.visible = (i == 0)
        fig_disease.add_trace(tr)

# переключатель между болезнями
buttons = []
for i, dis in enumerate(diseases_sorted):
    vis = [False] * len(fig_disease.data)
    start = sum(traces_per_disease[:i])
    end = start + traces_per_disease[i]
    for k in range(start, end):
        vis[k] = True
    buttons.append(dict(
        label=dis,
        method="update",
        args=[{"visible": vis}, {"title": f"Co-occurrence network — Disease: {dis}"}]
    ))

fig_disease.update_layout(
    updatemenus=[dict(type="dropdown", x=0.01, y=1.05, buttons=buttons, showactive=True)],
    title="",
    margin=dict(l=0, r=0, t=60, b=0)
)
by_disease_path = OUT / "outbreaks_network_by_disease_regions_weighted.html"
fig_disease.write_html(str(by_disease_path), include_plotlyjs="cdn")

# Графы по годам
years_sorted = sorted(clean["year"].dropna().unique().tolist())
fig_year = go.Figure()
traces_per_year = []

for i, yr in enumerate(years_sorted):
    ed = edges_with_diseases_for_year(int(yr))
    fig = figure_from_edges(ed, f"Co-occurrence network — Year: {int(yr)}", detail_col="diseases")
    traces_per_year.append(len(fig.data))
    for tr in fig.data:
        tr.visible = (i == 0)
        fig_year.add_trace(tr)

# переключатель между годами
buttons_y = []
for i, yr in enumerate(years_sorted):
    vis = [False] * len(fig_year.data)
    start = sum(traces_per_year[:i])
    end = start + traces_per_year[i]
    for k in range(start, end):
        vis[k] = True
    buttons_y.append(dict(
        label=str(int(yr)),
        method="update",
        args=[{"visible": vis}, {"title": f"Co-occurrence network — Year: {int(yr)}"}]
    ))

fig_year.update_layout(
    updatemenus=[dict(type="dropdown", x=0.01, y=1.05, buttons=buttons_y, showactive=True)],
    title="",
    margin=dict(l=0, r=0, t=60, b=0)
)

by_year_path = OUT / "outbreaks_network_by_year_regions_weighted.html"
fig_year.write_html(str(by_year_path), include_plotlyjs="cdn")
