In [63]:
import pandas as pd
from hashlib import md5

In [64]:
df = pd.read_csv('xrefs.csv').drop_duplicates().groupby(["src_repo", "dest_repo"]).agg(refs=pd.NamedAgg(column="issue_no", aggfunc="count")).sort_values("refs", ascending=False)

def to_gexf(df, file_name):
    header="""<?xml version="1.0" encoding="UTF-8"?>
<gexf xmlns="http://gexf.net/1.2" version="1.2">
  <meta lastmodifieddate="2009-03-20">
    <creator>Gexf.net</creator>
    <description>xref data</description>
  </meta>
  <graph mode="static" defaultedgetype="directed">
"""
    
    footer ="""        
  </graph>
</gexf>
"""

    with open(file_name, "w") as f:
        f.write(header)
        f.write("    <nodes>\n")
        for ind in df.index:
            src, dest = ind
            f.write(f'      <node id="{src}" label="{src}" />\n')
            f.write(f'      <node id="{dest}" label="{dest}" />\n')
        f.write("    </nodes>\n")
        f.write("    <edges>\n")
        for ind in df.index:
            src, dest = ind
            edge_id = md5(bytes(f"{src}-{dest}", "utf-8")).hexdigest()
            weight = df['refs'][ind]
            f.write(f'      <edge id="{edge_id}" source="{src}" target="{dest}" weight="{weight}" />\n')
        f.write("    </edges>\n")
        f.write(footer)
    
    
def to_gml(df, file_name):
    with open(file_name, "w") as f:
        f.write("graph\n[\n")
        f.write("  directed 1\n")
        f.write("  weighted 1\n")
        for ind in df.index:
            src, dest = ind
            f.write(f'  node\n  [\n    id {src}\n    label "{src}"\n  ]\n')
            f.write(f'  node\n  [\n    id {dest}\n    label "{dest}"\n  ]\n')
        for ind in df.index:
            src, dest = ind
            weight = df['refs'][ind]
            f.write(f"  edge\n  [\n    source {src}\n    target {dest}\n    value={weight}\n  ]\n")
        f.write("]")

    

In [65]:
to_gexf(df, "xrefs.gexf")