## Imports & Config

In [1]:
import pandas as pd
import networkx as nx

# Path to the data file
DATA_PATH = "data/soc-sign-bitcoinotc.csv"


## Load & Clean the Dataset

In [2]:
import pandas as pd
import networkx as nx

DATA_PATH = "data/soc-sign-bitcoinotc.csv"

col_names = ["source", "target", "rating", "time"]

df = pd.read_csv(
    DATA_PATH,
    header=None,      # no header in file
    names=col_names,  # assign our own names
    sep=","           # IMPORTANT: comma-separated
)

display(df.head(10))
print("\nShape:", df.shape)
print("Min/Max rating:", df["rating"].min(), df["rating"].max())
print("Time range (epoch):", df["time"].min(), "→", df["time"].max())


Unnamed: 0,source,target,rating,time
0,6,2,4,1289242000.0
1,6,5,2,1289242000.0
2,1,15,1,1289243000.0
3,4,3,7,1289245000.0
4,13,16,8,1289254000.0
5,13,10,8,1289254000.0
6,7,5,1,1289363000.0
7,2,21,5,1289371000.0
8,2,20,5,1289371000.0
9,21,2,5,1289381000.0



Shape: (35592, 4)
Min/Max rating: -10 10
Time range (epoch): 1289241911.72836 → 1453684323.75728


## Build the Directed Graph G

In [3]:
G = nx.DiGraph()

for _, row in df.iterrows():
    G.add_edge(
        int(row["source"]),
        int(row["target"]),
        rating=int(row["rating"]),
        time=float(row["time"])   # or int(row["time"]) if you want to drop decimals
    )

print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())


Number of nodes: 5881
Number of edges: 35592


## Create and Save Node Table

In [4]:
# 1) Node table
nodes = []
for n in G.nodes():
    nodes.append({
        "id": n,
        "in_degree": G.in_degree(n),
        "out_degree": G.out_degree(n),
    })

nodes_df = pd.DataFrame(nodes)
display(nodes_df.head())

nodes_df.to_csv("nodes.csv", index=False)
print("Saved nodes.csv")


Unnamed: 0,id,in_degree,out_degree
0,6,44,40
1,2,41,45
2,5,3,3
3,1,226,215
4,15,13,15


Saved nodes.csv


## Create and Save Edge Table

In [5]:
# 2) Edge table
edges = []
for u, v, d in G.edges(data=True):
    r = int(d.get("rating", 0))
    edges.append({
        "source": u,
        "target": v,
        "rating": r,
        "abs_rating": abs(r),
        "sign": "positive" if r > 0 else ("negative" if r < 0 else "zero"),
        "time": int(d.get("time", 0))
    })

edges_df = pd.DataFrame(edges)
display(edges_df.head())

edges_df.to_csv("edges.csv", index=False)
print("Saved edges.csv")


Unnamed: 0,source,target,rating,abs_rating,sign,time
0,6,2,4,4,positive,1289241911
1,6,5,2,2,positive,1289241941
2,6,4,2,2,positive,1289770700
3,6,7,5,5,positive,1290826367
4,6,114,2,2,positive,1296291457


Saved edges.csv


## Export Graph to GraphML

In [None]:
OUTPUT_GRAPHML = "bitcoin_otc_signed.graphml"

nx.write_graphml(G, OUTPUT_GRAPHML)
print(f"Saved GraphML to {OUTPUT_GRAPHML}")


Saved GraphML to bitcoin_otc_signed.graphml
