### Import

In [4]:
import networkx as nx
import pandas as pd
import os
import glob
import matplotlib
import scipy

### Hard coded username dictionary

In [5]:
username_dict = {
    "samuel": "swang330",
    "alex": "alex.kalis",
    "alexandra": "alexandrapurdy_",
    "kabir": "kabir_aho",
    "kirin": "kirindanek",
    "liam": "liam_hochman",
    "noah": "noahpurow",
    "seoeun": "seoeunki.m",
    "will": "will.deley",
    "zach": "zach.annuik",
    "yota": "yota.katsikouli",
    "dis_abroad": "dis.copenhagen",
    "ely": "elybrayboy"
}

### Build network

In [18]:

current_dir = os.path.dirname(os.getcwd())
csv_directory = os.path.join(current_dir, "final_project", "csv_files") # may need to edit for your setup

output_graph_file = "final_project/instagram_network.graphml"
# ---

def build_network_from_csvs(csv_dir):
    G = nx.DiGraph()
    csv_files = glob.glob(os.path.join(csv_dir, "*_data.csv"))

    if not csv_files:
        print(f"error: no '*_data.csv' files found in directory: {csv_dir}")
        return None

    print(f"found {len(csv_files)} CSV files to process.")

    for csv_path in csv_files:
        filename = os.path.basename(csv_path)
        print(f"processing: {filename}...")

        parts = filename.replace("_data.csv", "").split('_')
        if len(parts) < 2:
            print(f"warning: skipping file with unexpected name format: {filename}")
            continue

        relationship_type = parts[-1]
        central_node = username_dict["_".join(parts[:-1])]
        print(central_node)

        if relationship_type not in ["followers", "following"]:
            print(f"warning: Skipping file - cannot determine relationship type (expected 'followers' or 'following'): {filename}")
            continue

        if not central_node:
             print(f"warning: Skipping file - could not determine central node: {filename}")
             continue
             
        G.add_node(central_node)

        try:
            df = pd.read_csv(csv_path)
            if "username" not in df.columns:
                print(f"warning: Skipping file - 'username' column not found in {filename}")
                continue

            other_users = df["username"].dropna().unique()

            for user in other_users:
                G.add_node(user) # Add other user node if it doesn't exist

                if relationship_type == "followers":
                    # Edge from follower (user) to central_node
                    G.add_edge(user, central_node)
                elif relationship_type == "following":
                    # Edge from central_node to the person they follow (user)
                    G.add_edge(central_node, user)

            print(f"-> added {len(other_users)} edges for {central_node} ({relationship_type}).")

        except pd.errors.EmptyDataError:
             print(f"warning: Skipping empty file: {filename}")
        except Exception as e:
            print(f"error processing file {filename}: {e}")

    return G

network_graph = build_network_from_csvs(csv_directory)


print("\n--- Graph Construction Complete ---")
print(f"Number of nodes (users): {network_graph.number_of_nodes()}")
print(f"Number of edges (relationships): {network_graph.number_of_edges()}")


found 24 CSV files to process.
processing: dis_abroad_followers_data.csv...
dis.copenhagen
-> added 4856 edges for dis.copenhagen (followers).
processing: samuel_following_data.csv...
swang330
-> added 1104 edges for swang330 (following).
processing: alex_following_data.csv...
alex.kalis
-> added 1675 edges for alex.kalis (following).
processing: zach_followers_data.csv...
zach.annuik
-> added 603 edges for zach.annuik (followers).
processing: liam_followers_data.csv...
liam_hochman
-> added 817 edges for liam_hochman (followers).
processing: dis_abroad_following_data.csv...
dis.copenhagen
-> added 240 edges for dis.copenhagen (following).
processing: samuel_followers_data.csv...
swang330
-> added 683 edges for swang330 (followers).
processing: liam_following_data.csv...
liam_hochman
-> added 1678 edges for liam_hochman (following).
processing: alex_followers_data.csv...
alex.kalis
-> added 1235 edges for alex.kalis (followers).
processing: zach_following_data.csv...
zach.annuik
-> add