In [1]:
import plotly.express as px
import plotly.offline as py
import pandas as pd

In [13]:
df = pd.read_csv("data/coi_network.csv", index_col=[0])
df

Unnamed: 0,source,target,source_group,target_group,weight,source_image,target_image
0,GS00S04838|00001197902C1045929E3101CGS00S04838,9122532,contract,vendor,0.111427,/company_contract.png,/company_vendor.png
1,GS00S86307|00001197908C2526929E3101GS00S86307,9122532,contract,vendor,0.114374,/company_contract.png,/company_vendor.png
2,GS00S86603|00001198011CDTFA0180F87043GS00S86603,9122532,contract,vendor,0.236682,/company_contract.png,/company_vendor.png
3,GS00S86953|00001198012CDTFA0181F87074GS00S86953,9122532,contract,vendor,0.143846,/company_contract.png,/company_vendor.png
4,GS00S86307|00001197909CW17982181GS00S86307,9122532,contract,vendor,0.448878,/company_contract.png,/company_vendor.png
...,...,...,...,...,...,...,...
299,9122532,77817617,vendor,secondary_vendor,0.815802,/company_vendor.png,/company_secondary.png
300,9122532,49705585,vendor,secondary_vendor,0.112900,/company_vendor.png,/company_secondary.png
301,9122532,46667523,vendor,secondary_vendor,0.000000,/company_vendor.png,/company_secondary.png
302,company,9122532,entity,vendor,44.093095,/company_entity.png,/company_vendor.png


In [15]:
df = df[["source", "target", "source_group", "target_group"]]
df.to_csv("data/hook_network.csv")
df

Unnamed: 0,source,target,source_group,target_group
0,GS00S04838|00001197902C1045929E3101CGS00S04838,9122532,contract,vendor
1,GS00S86307|00001197908C2526929E3101GS00S86307,9122532,contract,vendor
2,GS00S86603|00001198011CDTFA0180F87043GS00S86603,9122532,contract,vendor
3,GS00S86953|00001198012CDTFA0181F87074GS00S86953,9122532,contract,vendor
4,GS00S86307|00001197909CW17982181GS00S86307,9122532,contract,vendor
...,...,...,...,...
299,9122532,77817617,vendor,secondary_vendor
300,9122532,49705585,vendor,secondary_vendor
301,9122532,46667523,vendor,secondary_vendor
302,company,9122532,entity,vendor


## Bar chart

In [12]:
df = pd.DataFrame({"entity":["Company #1", "Company #1", 
                             "Company #2", "Company #2", 
                             "Company #3", "Company #3", 
                             "Company #4", "Company #4"],
                   "links":[381, 574, 536, 737, 487, 2027, 2194, 2691],
                   "version":["old ", "new", "old ", "new", "old ", "new", "old ", "new"]})


rows = []
timesteps = 10
for entity, e in df.groupby("entity"):
    min_links, max_links = min(e['links']), max(e['links'])
    link_spacer = int((max_links-min_links)/(timesteps-2))
    for time_idx, spacer in enumerate(range(min_links, max_links+link_spacer, link_spacer)):
        rows.append(pd.DataFrame({"entity":entity, "links":spacer, "version":time_idx}, index=[0]))
    
df = pd.concat(rows)

# making sure all bars have the same ending height
min_bar_height = min(df.groupby("entity")["version"].agg(['max'])["max"]) 
df = df.groupby('entity').head(min_bar_height).reset_index(drop=True)

In [14]:
fig = px.bar(df, x="entity", y="links", color="entity", 
             animation_frame="version",
             range_y=[0, max(df["links"])+100])


fig.update_layout(title="""Increasing Potential COIs Identified: # Contracts Linked to Companies (167% Increase)
                           <br><i style='font-size:12;'>         *companies have been anonymized</i>""", showlegend=False)
fig["layout"].pop("updatemenus") 
fig["layout"].pop("sliders") 
fig["layout"]["xaxis"]['title'] = "Company Name"
fig["layout"]["yaxis"]['title'] = "Identified Links"

fig.update_layout(transition = {'duration':1}, xaxis={'categoryorder':'total descending'})
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})

py.iplot(fig)

In [15]:
fig.write_html("../old_vs_new_process.html")

In [6]:
import pandas as pd
df = pd.read_csv("./data/coi_network.csv", index_col=False)

In [11]:
for c in list(df):
    df[c] = df[c].replace("hp", "company", regex=True)

In [13]:
df.to_csv("coi_network.csv")