# Final Visualizations

In [1]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import netin
import experiments
import os

In [2]:
MINORITY_FRACTION = 0.3

In [None]:
EXPERIMENT_NAME = "Exp12H2000NodesPageRankMediumHomophily1Rewiring100Iterations"

### Plots

In [8]:
def plot_agg_results(df, col="0.1", by=10):
    df_agg = df.groupby(df.index // by).mean().reset_index()
    fig = go.Figure()

    # Assuming `df_agg` is your aggregated DataFrame
    fig.add_trace(
        go.Scatter(
            x=df_agg["Iteration"],
            y=df_agg[col],
            mode="lines",
            line=dict(color="black", width=4),
            name="Fraction of Minorities in Top 10%",
        )
    )

    fig.update_layout(
        height=600,
        width=800,
        title={
            "y": 0.9,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        },
        xaxis_title="Feedback Loop",
        yaxis_title="Minority Fraction",
        font=dict(
            family="Arial, sans-serif",
            size=28,
            color="black",
        ),
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99),
    )

    fig.update_xaxes(showline=True, linewidth=1, linecolor="black", mirror=False)
    fig.update_yaxes(showline=True, linewidth=1, linecolor="black", mirror=False)

    fig.show()

    if os.path.exists(f"data/{EXPERIMENT_NAME}/plot") is False:
        os.makedirs(f"data/{EXPERIMENT_NAME}/plot")

    fig.write_image(f"data/{EXPERIMENT_NAME}/plot/agg_by_{by}.svg")

In [19]:
df = pd.read_csv(f"data/{EXPERIMENT_NAME}/results.csv")
df.head()

Unnamed: 0,Iteration,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,...,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1.00,homophily_major,homophily_minor
0,0,0.210526,0.230769,0.169492,0.177215,0.181818,0.184874,0.223022,0.213836,0.22905,...,0.300971,0.298292,0.298153,0.299426,0.300465,0.300817,0.299646,0.3,0.754779,0.669976
1,1,0.210526,0.230769,0.169492,0.177215,0.181818,0.184874,0.223022,0.220126,0.22905,...,0.300971,0.298667,0.298153,0.298905,0.300465,0.300817,0.300152,0.3,0.769433,0.651115
2,2,0.210526,0.230769,0.169492,0.164557,0.181818,0.193277,0.230216,0.226415,0.22905,...,0.300971,0.298667,0.297996,0.299426,0.300981,0.300153,0.3,0.3,0.750295,0.673837
3,3,0.210526,0.230769,0.169492,0.164557,0.181818,0.193277,0.230216,0.226415,0.22905,...,0.300971,0.2992,0.299051,0.299948,0.300981,0.300664,0.3,0.3,0.749654,0.67387
4,4,0.210526,0.230769,0.169492,0.164557,0.181818,0.193277,0.223022,0.220126,0.22905,...,0.300971,0.299733,0.29942,0.299948,0.301497,0.300664,0.3,0.3,0.749641,0.673885


In [20]:
df.tail()

Unnamed: 0,Iteration,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,...,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1.00,homophily_major,homophily_minor
56,56,0.210526,0.205128,0.169492,0.189873,0.191919,0.193277,0.194245,0.202532,0.207865,...,0.299081,0.29802,0.300159,0.301047,0.301036,0.302409,0.302078,0.3,0.74906,0.655312
57,57,0.210526,0.205128,0.169492,0.189873,0.191919,0.193277,0.194245,0.202532,0.207865,...,0.29854,0.298555,0.300159,0.300524,0.301036,0.301896,0.302078,0.3,0.748963,0.655429
58,58,0.210526,0.205128,0.169492,0.189873,0.191919,0.193277,0.188406,0.208861,0.207865,...,0.29816,0.29834,0.299417,0.29979,0.300311,0.301693,0.301217,0.3,0.748319,0.656237
59,59,0.210526,0.205128,0.169492,0.189873,0.191919,0.194915,0.188406,0.208861,0.207865,...,0.298103,0.298123,0.299363,0.299738,0.300623,0.301489,0.301724,0.3,0.748945,0.65621
60,60,0.210526,0.205128,0.169492,0.189873,0.191919,0.194915,0.195652,0.202532,0.207865,...,0.297019,0.297051,0.299363,0.299213,0.300104,0.300821,0.301724,0.3,0.749452,0.654806


### Plots

In [21]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df["Iteration"], y=df["0.10"], mode="lines"))
fig.update_layout(
    title="Minority fraction over iterations",
    xaxis_title="Iteration",
    yaxis_title="Minority fraction",
)
fig.show()

### Aggregated 

In [22]:
df_agg_10 = df.groupby(df.index // 10).mean().reset_index()
df_agg_15 = df.groupby(df.index // 15).mean().reset_index()
df_agg_20 = df.groupby(df.index // 20).mean().reset_index()
df_agg_25 = df.groupby(df.index // 25).mean().reset_index()

In [23]:
plot_agg_results(df, col="0.10", by=5)

In [24]:
plot_agg_results(df, col="0.10", by=10)

In [25]:
plot_agg_results(df, col="0.10", by=20)

In [26]:
df_agg_10

Unnamed: 0,index,Iteration,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,...,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1.00,homophily_major,homophily_minor
0,0,4.5,0.210526,0.230769,0.169492,0.167089,0.181818,0.189076,0.223741,0.223899,...,0.300512,0.298731,0.298987,0.299791,0.300681,0.300643,0.299879,0.3,0.751961,0.671205
1,1,14.5,0.210526,0.2,0.169492,0.164557,0.181818,0.191597,0.215827,0.226415,...,0.298884,0.299152,0.300042,0.299536,0.29984,0.299765,0.300394,0.3,0.745895,0.67125
2,2,24.5,0.210526,0.2,0.169492,0.175949,0.174747,0.201681,0.215827,0.218868,...,0.299272,0.29992,0.299741,0.299447,0.299417,0.300751,0.30086,0.3,0.739263,0.675185
3,3,34.5,0.210526,0.205128,0.169492,0.177215,0.171717,0.201681,0.207914,0.208176,...,0.298446,0.299408,0.299061,0.298758,0.300186,0.30135,0.301285,0.3,0.72749,0.686811
4,4,44.5,0.210526,0.205128,0.169492,0.187342,0.178788,0.196639,0.2,0.210063,...,0.298191,0.298868,0.298415,0.30046,0.301799,0.302041,0.302065,0.3,0.729301,0.685468
5,5,54.5,0.210526,0.205128,0.169492,0.189873,0.190909,0.193441,0.198113,0.206938,...,0.299075,0.298647,0.29972,0.301334,0.301366,0.302125,0.301819,0.3,0.738775,0.668775
6,6,60.0,0.210526,0.205128,0.169492,0.189873,0.191919,0.194915,0.195652,0.202532,...,0.297019,0.297051,0.299363,0.299213,0.300104,0.300821,0.301724,0.3,0.749452,0.654806


In [27]:
import netin.utils.constants
import math
import numpy as np

# show all rows and columns
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

netin.utils.constants.RANK_RANGE = np.arange(0.01, 1 + 0.01, 0.01)


def find_decile(row):
    for col in [f"{p:.2f}" for p in netin.utils.constants.RANK_RANGE]:
        if math.isclose(row[col], 0.3, abs_tol=0.01):
            return float(col) * 100

In [28]:
# Plot the 'decile' column


def plot_parity(df, by=10):
    df_agg = df.groupby(df.index // by).mean().reset_index()
    df_agg["parityK"] = df_agg.apply(find_decile, axis=1)
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=df_agg["Iteration"],
            y=df_agg["parityK"],
            mode="lines",
            line=dict(color="black", width=5),
            name="Fraction of Minorities in Top 10%",
        )
    )

    fig.update_layout(
        height=600,
        width=800,
        title={
            "y": 0.9,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        },
        xaxis_title="Feedback Loop",
        yaxis_title="Top-K at which minorities are 30%",
        font=dict(
            family="Arial, sans-serif",
            size=32,
            color="black",
        ),
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99),
    )

    fig.update_xaxes(showline=True, linewidth=1, linecolor="black", mirror=False)
    fig.update_yaxes(showline=True, linewidth=1, linecolor="black", mirror=False)

    if os.path.exists(f"data/{EXPERIMENT_NAME}/plot") is False:
        os.makedirs(f"data/{EXPERIMENT_NAME}/plot")

    fig.write_image(f"data/{EXPERIMENT_NAME}/plot/parity_agg_by_{by}.svg")

    fig.show()

In [29]:
plot_parity(df, by=10)