In [None]:
import numpy as np
import math
import statsmodels.api as sm
import imblearn
from tqdm.notebook import tqdm
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from networkx.algorithms import bipartite, community 
from numpy import nansum
from numpy import nanmean
from numpy.random import choice
import itertools
from networkx.algorithms import community
import community as community_louvain
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import networkx as nx

In [None]:
#cb1 comes from cleaning the data we retrieved from CB Insights. However, we cannot make them available
cb1 = pd.read_csv('cb1.csv')
cb2 = pd.read_csv('cb2.csv')
cb3 = pd.read_csv('cb3.csv')


In [None]:
cb = cb1.append(cb2)
cb = cb.append(cb3)
cb.index = range(len(cb))
cb['date_my'] = (pd.to_datetime(cb['date'],format='%m/%d/%Y', errors='coerce')).dt.to_period('m')
cb['date_y'] = (pd.to_datetime(cb['date'],format='%m/%d/%Y', errors='coerce')).dt.to_period('y')

nodes_org = cb[['target_name', 'target_ID', 'target_country', 'target_continent','industry', 'target_founded', 'target_status']].drop_duplicates()
nodes_org['bipartite'] = 1
nodes_inv = cb[['investors_name', 'investor_id', 'investor_country', 'investor_continent', 'investor_type', 'investor_founded']].drop_duplicates()
nodes_org = nodes_org.rename(columns={"target_name": "node", "target_country": "country",  
                                      "target_continent": "continent", "industry":"industry", "target_status":"status",
                                     "target_ID":"index", "target_founded":"foundation"})
nodes_inv = nodes_inv.rename(columns={"investors_name": "node","investor_country": "country", 
                                      "investor_continent": "continent", "investor_type":"industry","investor_id":'index', "investor_founded":"foundation" })
nodes_inv['bipartite'] = 0
nodes = nodes_org.append(nodes_inv, ignore_index= True)
nodes['id'] = nodes.index
nodes_org = nodes[nodes['bipartite']==1]
nodes_inv = nodes[nodes['bipartite']==0]
numbers = cb.groupby(['target_name','stage'], as_index = False).count()[['target_name','stage','investors_name']].rename(columns={"investors_name": "investor_numbers"})
cb_new = pd.merge(cb, numbers,  how='left', left_on=['target_name','stage'], right_on = ['target_name','stage'])
cb_new = pd.merge(cb_new, nodes[nodes['bipartite']== 1],  how='left', left_on=['target_name','target_ID'], right_on = ['node','index'])
cb_new = pd.merge(cb_new, nodes[nodes['bipartite']== 0],  how='left', left_on=['investors_name','investor_id'], right_on = ['node','index'])


cb_new.date_y=cb_new.date_y.dt.year


In [None]:
#bipartite graph creation
B = nx.MultiGraph(name='org-bid')
B.add_nodes_from(nodes[nodes.bipartite==0].id)
B.add_nodes_from(nodes[nodes.bipartite==1].id)
nx.set_node_attributes(B, pd.Series(nodes.node, index=nodes.id).to_dict(), 'name')
nx.set_node_attributes(B, pd.Series(nodes.country, index=nodes.id).to_dict(), 'country')
nx.set_node_attributes(B, pd.Series(nodes.continent, index=nodes.continent).to_dict(), 'continent')
nx.set_node_attributes(B, pd.Series(nodes.industry, index=nodes.id).to_dict(), 'industry')
nx.set_node_attributes(B, pd.Series(nodes.foundation, index=nodes.id).to_dict(), 'foundation')
nx.set_node_attributes(B, pd.Series(nodes.bipartite, index=nodes.id).to_dict(), 'bipartite')
nx.set_node_attributes(B, pd.Series(nodes.status, index=nodes.id).to_dict(), 'target_status')

In [None]:
bid_nodes = {n for n, d in B.nodes(data=True) if d["bipartite"] == 0}
org_nodes = set(B) - bid_nodes

In [None]:
cb_new_wo = cb_new[(cb_new["round_simp"] != 'Line of Credit') & (cb_new["round_simp"]!= 'Loan') & (cb_new["round_simp"]!= 'Debt')] #remove dealsconcerning debt: we only consider equity transactions
cb_new_wo = cb_new_wo[pd.notna(cb_new_wo['date_y'])] #keep only transactions for which we have a date
cb_new_wo = cb_new_wo[pd.notna(cb_new_wo['size_real'])] #keep only transactions for which we have the amount of money invested

cb_new_wo.index = range(len(cb_new_wo))
for i in range(len(cb_new_wo)):
    B.add_edge(cb_new_wo.loc[i,'id_y'], cb_new_wo.loc[i,'id_x'], date=(cb_new_wo.loc[i,'date_y']), m_raised=cb_new_wo.loc[i,'size_real'], 
               investors = cb_new_wo.loc[i,'investor_numbers'], round_simp =cb_new_wo.loc[i,'round_simp'])

In [None]:
bad_nodes = [x for x,y in B.nodes(data=True) if (y['name'] in ['Undisclosed Investors', 'ciao']) or (y['name']=='Dead')or (y['name']=='Public')
          or  (y['name']=='Undisclosed Angel Investors') or (y['name']=='Undisclosed Venture Investors')] #remove nodes that do not have a precise investor
B.remove_nodes_from(bad_nodes)


In [None]:
#Adding an ID to have a unique combination firm-round to subsequently create links among the investors of the same round
id_round = cb_new[['target_ID','round_simp']]
id_round = id_round.drop_duplicates()
id_round.index = range(len(id_round))
id_round['id_round'] = id_round.index
id_round

id_year = cb_new[['target_ID','date_y']]
id_year = id_year.drop_duplicates()
id_year.index = range(len(id_year))
id_year['id_year'] = id_year.index
id_year

cb_new = pd.merge(cb_new, id_round,  how='left', left_on=['target_ID','round_simp'], right_on = ['target_ID','round_simp'])
cb_new = pd.merge(cb_new, id_year,  how='left', left_on=['target_ID','date_y'], right_on = ['target_ID','date_y'])

In [None]:
#links between investors in the same round
links = []
subcb = cb_new[['round_simp','date_y','id_y','size_real','id_round','investor_numbers','target_name','id_x','target_country']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_y'])]
subcb = subcb[pd.notna(subcb['size_real'])]
subcb = subcb[(subcb["round_simp"] != 'Line of Credit') & (subcb["round_simp"]!= 'Loan') & (subcb["round_simp"]!= 'Debt')]
for i in tqdm(subcb['id_round'].unique()):
    subcb2 = subcb[subcb['id_round']==i]
    for couple in itertools.combinations_with_replacement(np.unique(subcb2['id_y'].values),2):
        links.append((couple + ({'date': subcb2['date_y'].values[0],
                                'round': subcb2['round_simp'].values[0],
                                'money': subcb2['size_real'].values[0]/(subcb2['investor_numbers'].values[0]),
                                'investor_numbers': subcb2['investor_numbers'].values[0],
                                #'status': subcb2["stato"].values[0],
                                 'id_target': subcb2["id_x"].values[0],
                                 'country_target': subcb2["target_country"].values[0],
                                'target_name': subcb2['target_name'].values[0]},)))

In [None]:
bidbid_stage_self=nx.MultiGraph()
bidbid_stage_self.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid_stage_self.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid_stage_self.degree()).items() if degree == 0]
bidbid_stage_self.remove_nodes_from(remove)
bidbid_stage_self.remove_nodes_from(bad_nodes)
nx.readwrite.write_gpickle(bidbid_stage_self,'bidbid_stage_self_cb.gpickle')
nx.info(bidbid_stage_self)

In [None]:
#from multigraph to graph
bidbid_stage_self_single = nx.Graph()
for u,v,data in tqdm(bidbid_stage_self.edges(data=True)):
    d = data['date']
    m = data['money']
    r = data['round']
    t = data['target_name']
    
    if bidbid_stage_self_single.has_edge(u,v):
        bidbid_stage_self_single[u][v]['date'] = list(set([bidbid_stage_self[u][v][0]['date']] + [d]))
        bidbid_stage_self_single[u][v]['money'] += m
        bidbid_stage_self_single[u][v]['rounds'] = list(set([bidbid_stage_self[u][v][0]['round']] + [r]))
        bidbid_stage_self_single[u][v]['target_name'] = list(set([bidbid_stage_self[u][v][0]['target_name']] + [t]))
    else:
        bidbid_stage_self_single.add_edge(u, v, date=[d])
        bidbid_stage_self_single.add_edge(u, v, money = m)
        bidbid_stage_self_single.add_edge(u, v, rounds =[r])
        bidbid_stage_self_single.add_edge(u, v, target_name = [t])
        

In [None]:
nx.readwrite.write_gpickle(bidbid_stage_self_single,'bidbid_stage_self_single.gpickle')
nx.info(bidbid_stage_self_single)

In [None]:
#create yearly snapshots
years = range(1995,2022)
for period in years:
    locals()["bidbid_stage_self_single_"+str(period-1)]=nx.Graph([(u,v,d) for u,v,d in bidbid_stage_self_single.edges(data=True) if d['date'][-1] < period])
    print(period-1)

In [None]:
for period in years:
    nx.readwrite.write_gpickle(locals()["bidbid_stage_self_single_"+str(period-1)], "bidbid_stage_self_single_"+str(period-1)+'.gpickle')
    print(nx.info(locals()["bidbid_stage_self_single_"+str(period-1)]))

# ORGORG

In [None]:
from itertools import islice

def window(seq, n):               "
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result

In [None]:
#projection among the firms
count=0
finestra = list(range(1990,2022))
win = []
for i in range(1,len(finestra)+1):
    win.append(finestra[:i])
for year in range(1979,2022):
    locals()["orgorg_"+str(year)] = nx.MultiGraph()

for node in tqdm(bid_nodes):
    ego=nx.ego_graph(B, node)
    ndx=list(ego.nodes())
    ndx.remove(node)
    for period in win: 
        subndx=[x for x in ndx if [y for y in list(ego.edges(data=True)) if y[0]==x or y[1]==x][0][2]['date'] in period]
        for couple in itertools.combinations(subndx, 2):
            year1 = ego[node][couple[0]][0]['date']
            year2 = ego[node][couple[1]][0]['date']
            if year1 - year2 in range(-6,7):
                locals()["orgorg_"+str(period[-1])].add_edge(couple[0], couple[1], year = max(year1,year2))
    count+=1
    if count%1000==0:
        print(count, end=' ')

In [None]:
#from multi to single
for period in win:
    a = nx.Graph()
    for u,v,data in locals()["orgorg_"+str(period[-1])].edges(data=True):
        d = data['year']
        if a.has_edge(u,v)==False:
            a.add_edge(u, v, date=[d])        
    locals()["orgorg_single_"+str(period[-1])] = a
    nx.readwrite.write_gpickle(locals()["orgorg_single_"+str(period[-1])], "orgorg_single"+str(period[-1])+".gpickle")
    print(nx.info(locals()["orgorg_single_"+str(period[-1])]))
    print(period[-1])