In [None]:
import numpy as np
import math
import statsmodels.api as sm
import imblearn
from tqdm.notebook import tqdm
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from networkx.algorithms import bipartite, community 
from numpy import nansum
from numpy import nanmean
from numpy.random import choice
import itertools
from networkx.algorithms import community
import community as community_louvain
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import networkx as nx
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 300)

## Bipartite network generation

### Data preparation

In [None]:
cb1 = pd.read_csv('cb1.csv')
cb2 = pd.read_csv('cb2.csv')
cb3 = pd.read_csv('cb3.csv')

In [None]:
cb = cb1.append(cb2)
cb = cb.append(cb3)
cb.index = range(len(cb))

In [None]:
cb['date_my'] = (pd.to_datetime(cb['date'],format='%m/%d/%Y', errors='coerce')).dt.to_period('m')
cb['date_y'] = (pd.to_datetime(cb['date'],format='%m/%d/%Y', errors='coerce')).dt.to_period('y')

# DESCRIZIONE

In [None]:
cb.groupby(['target_continent','investor_continent']).size().unstack(fill_value=0)

In [None]:
cb_a = cb.groupby(['target_continent','investor_continent']).size().unstack(fill_value=0)
cb_a["sum"] = cb_a.sum(axis = 0)
cb_a = cb_a.loc[:,"Africa":"South America"].div(cb_a["sum"], axis=0)
cb_a

In [None]:
es1 = cb.groupby(['target_name','stage'], as_index = False).agg({"size_real": 'first'})
es2 = es1[['target_name','size_real']].groupby(['target_name'],as_index=False).sum()
es3 = es2[es2['size_real']!=0]
np.log(es3['size_real']).plot.kde(0.01)

In [None]:
np.log(es3['size_real']).plot.kde()

In [None]:
es3.sort_values("size_real",ascending=False)[0:300]

In [None]:
cb

In [None]:
cb[cb['investors_name']=='Public'][-300:]

In [None]:
#ordinare gli investitori

inv_list = list(cb['investors_name'].values)
inv_counter = {}
for inv in inv_list:
    if inv in inv_counter:
        inv_counter[inv] += 1
    else:
        inv_counter[inv] = 1

popular_invs = sorted(inv_counter, key = inv_counter.get, reverse = True)
 
top = popular_invs[:20]

In [None]:
for i in popular_invs:
    if pd.isna(i):
        print(i)

In [None]:
inv_counter['Dead']

# GRAPHS

In [None]:
nodes_org = cb[['target_name', 'target_ID']].drop_duplicates()
nodes_org['bipartite'] = 1
nodes_inv = cb[['investors_name', 'investor_id']].drop_duplicates()
nodes_org = nodes_org.rename(columns={"target_name": "node", "target_country": "country", 
                                      "target_continent": "continent", "industry":"industry",
                                     "target_ID":"index"})
nodes_inv = nodes_inv.rename(columns={"investors_name": "node","investor_country": "country", 
                                      "investor_continent": "continent", "investor_type":"industry","investor_id":'index' })
nodes_inv['bipartite'] = 0
nodes = nodes_org.append(nodes_inv, ignore_index= True)
nodes['id'] = nodes.index
nodes

In [None]:
numbers = cb.groupby(['target_name','stage'], as_index = False).count()[['target_name','stage','investors_name']].rename(columns={"investors_name": "investor_numbers"})

In [None]:
cb_new = pd.merge(cb, numbers,  how='left', left_on=['target_name','stage'], right_on = ['target_name','stage'])
cb_new = pd.merge(cb_new, nodes[nodes['bipartite']== 1],  how='left', left_on=['target_name','target_ID'], right_on = ['node','index'])
cb_new = pd.merge(cb_new, nodes[nodes['bipartite']== 0],  how='left', left_on=['investors_name','investor_id'], right_on = ['node','index'])


In [None]:
##ripeto quanto fatto prima (semplificato prima per risparmiare tempo) ma aggiungo roba
nodes_org = cb_new[['target_name', 'target_ID', 'target_country', 'target_continent',
               'industry','target_founded','target_status']].drop_duplicates()
nodes_org['bipartite'] = 1
nodes_inv = cb_new[['investors_name', 'investor_id', 'investor_country', 'investor_continent', 'investor_type',
                   'investor_founded']].drop_duplicates()
nodes_org = nodes_org.rename(columns={"target_name": "node", "target_country": "country", 
                                      "target_continent": "continent", "industry":"industry",
                                     "target_ID":"index", "target_founded":"foundation"})
nodes_inv = nodes_inv.rename(columns={"investors_name": "node","investor_country": "country", 
                                      "investor_continent": "continent", "investor_type":"industry", "investor_id":'index',
                                     "investor_founded":"foundation"})
nodes_inv['bipartite'] = 0
nodes = nodes_org.append(nodes_inv, ignore_index= True)
nodes['id'] = nodes.index
nodes.columns



In [None]:
B = nx.MultiGraph(name='org-bid')
# Add nodes with the node attribute "bipartite"
B.add_nodes_from(nodes[nodes.bipartite==0].id)
B.add_nodes_from(nodes[nodes.bipartite==1].id)
nx.set_node_attributes(B, pd.Series(nodes.node, index=nodes.id).to_dict(), 'name')
nx.set_node_attributes(B, pd.Series(nodes.country, index=nodes.id).to_dict(), 'country')
nx.set_node_attributes(B, pd.Series(nodes.continent, index=nodes.continent).to_dict(), 'continent')
nx.set_node_attributes(B, pd.Series(nodes.industry, index=nodes.id).to_dict(), 'industry')
nx.set_node_attributes(B, pd.Series(nodes.foundation, index=nodes.id).to_dict(), 'foundation')
nx.set_node_attributes(B, pd.Series(nodes.bipartite, index=nodes.id).to_dict(), 'bipartite')
nx.set_node_attributes(B, pd.Series(nodes.target_status, index=nodes.id).to_dict(), 'target_status')
# Add edges only between nodes of opposite node sets
cb_new_dates = cb_new[pd.notna(cb_new['date_y'])]
cb_new_dates.index = range(len(cb_new_dates)) 
for i in range(len(cb_new_dates)):
    B.add_edge(cb_new_dates.loc[i,'id_y'], cb_new_dates.loc[i,'id_x'], date=(cb_new_dates.loc[i,'date_y']).year, m_raised=cb_new_dates.loc[i,'size_real'], 
               investors = cb_new_dates.loc[i,'investor_numbers'], round_simp =cb_new_dates.loc[i,'round_simp'])
brutti = [x for x,y in B.nodes(data=True) if (y['name']=='Undisclosed Investors') or (y['name']=='Dead')   or (y['name']=='Public')  
          or  (y['name']=='Undisclosed Angel Investors') or (y['name']=='Undisclosed Venture Investors')]
B.remove_nodes_from(brutti)
remove = [node for node,degree in dict(B.degree()).items() if degree == 0]
B.remove_nodes_from(remove)
bid_nodes = {n for n, d in B.nodes(data=True) if d["bipartite"] == 0}
org_nodes = set(B) - bid_nodes

In [None]:
##nodi brutti
#'Undisclosed Investors',
# 'Public',
#'Undisclosed Angel Investors',
#'Undisclosed Venture Investors',
#Dead


In [None]:
bid_nodes = {n for n, d in B.nodes(data=True) if d["bipartite"] == 0}
org_nodes = set(B) - bid_nodes

# NEW GRAPHS

# BIDBID

In [None]:
id_round = cb_new[['target_ID','round_simp']]
id_round = id_round.drop_duplicates()
id_round.index = range(len(id_round))
id_round['id_round'] = id_round.index
id_round

id_year = cb_new[['target_ID','date_y']]
id_year = id_year.drop_duplicates()
id_year.index = range(len(id_year))
id_year['id_year'] = id_year.index
id_year

id_stage = cb_new[['target_ID','stage']]
id_stage = id_stage.drop_duplicates()
id_stage.index = range(len(id_stage))
id_stage['id_stage'] = id_stage.index
id_stage

In [None]:
cb_new = pd.merge(cb_new, id_round,  how='left', left_on=['target_ID','round_simp'], right_on = ['target_ID','round_simp'])


In [None]:
cb_new = pd.merge(cb_new, id_stage,  how='left', left_on=['target_ID','stage'], right_on = ['target_ID','stage'])

In [None]:
cb_new = pd.merge(cb_new, id_year,  how='left', left_on=['target_ID','date_y'], right_on = ['target_ID','date_y'])

In [None]:
nodes_org = nodes[nodes['bipartite']==1]
nodes_inv = nodes[nodes['bipartite']==0]

# FOLLOWERS

## STAGE

In [None]:
##bidbid in stesso stage
links = []
subcb = cb_new[['stage','date_my','id_y','size_real','investor_numbers','target_name']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_my'])]
subcb = subcb[pd.notna(subcb['size_real'])]
for i in tqdm(subcb['target_name'].unique()):
    subcb2 = subcb[subcb['target_name']==i]
    for date1 in (subcb2['date_my']).unique():
        for date2 in (subcb2['date_my']).unique():
            if date2>date1:
                list1 = list(subcb2[subcb2['date_my']==date1]['id_y'])
                list2 = list(subcb2[subcb2['date_my']==date2]['id_y'])
                for couple in itertools.product(list2, list1):
                    riferimento = subcb2[subcb2['date_my']==date2]
                    links.append((couple + ({'money': riferimento['size_real'].values[0]/riferimento['investor_numbers'].values[0],
                             'date': date2.year,
                             'stage': riferimento['stage'].values[0],
                             'target_name':i},)))

In [None]:
bidbid_dir=nx.MultiDiGraph()
bidbid_dir.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid_dir.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid_dir.degree()).items() if degree == 0]
bidbid_dir.remove_nodes_from(remove)
bidbid_dir.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid_dir.degree()).items() if degree == 0]
bidbid_dir.remove_nodes_from(remove2)
nx.info(bidbid_dir)

In [None]:
finestra = window(list(range(1993,2021)),5)

In [None]:
for periodo in finestra:
    locals()["bidbid_dir_"+str(periodo[-1])]=nx.MultiDiGraph([(u,v,d) for u,v,d in bidbid_dir.edges(data=True) if d['date'] in periodo])
    print(periodo[-1])

In [None]:
#tasformo i periodini multi in periodini single
for periodo in finestra:
    a = nx.DiGraph()
    for u,v,data in locals()["bidbid_dir_"+str(periodo[-1])].edges(data=True):
        d = data['date']
        m = data['money']
        s = data['stage']
        t = data['target_name']
        if a.has_edge(u,v):
            a[u][v]['date'] = list(set([locals()["bidbid_dir_"+str(periodo[-1])][u][v][0]['date']] + [d]))
            a[u][v]['money'] += m
            a[u][v]['stage'] = list(set([locals()["bidbid_dir_"+str(periodo[-1])][u][v][0]['stage']] + [s]))
            a[u][v]['target_name'] = list(set([locals()["bidbid_dir_"+str(periodo[-1])][u][v][0]['target_name']] + [t]))
        else:
            a.add_edge(u, v, date=[d])
            a.add_edge(u, v, money = m)
            a.add_edge(u, v, stage=[s])
            a.add_edge(u, v, target_name = [t])
        
    locals()["bidbid_dir_single_"+str(periodo[-1])] = a
    print(periodo[-1])

In [None]:
finestra = window(list(range(1993,2021)),5)
for periodo in finestra:
    locals()["bidbid_dir_single_"+str(periodo[-1])].add_nodes_from(nodes[nodes.bipartite==0].id)
    nx.set_node_attributes(locals()["bidbid_dir_single_"+str(periodo[-1])], pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
    nx.set_node_attributes(locals()["bidbid_dir_single_"+str(periodo[-1])], pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
    nx.set_node_attributes(locals()["bidbid_dir_single_"+str(periodo[-1])], pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
    nx.set_node_attributes(locals()["bidbid_dir_single_"+str(periodo[-1])], pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
    nx.set_node_attributes(locals()["bidbid_dir_single_"+str(periodo[-1])], pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
    nx.set_node_attributes(locals()["bidbid_dir_single_"+str(periodo[-1])], pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
    remove = [node for node,degree in dict( locals()["bidbid_dir_single_"+str(periodo[-1])].degree()).items() if degree == 0]
    locals()["bidbid_dir_single_"+str(periodo[-1])].remove_nodes_from(remove)
    locals()["bidbid_dir_single_"+str(periodo[-1])].remove_nodes_from(brutti)
    remove2 = [node for node,degree in dict( locals()["bidbid_dir_single_"+str(periodo[-1])].degree()).items() if degree == 0]
    locals()["bidbid_dir_single_"+str(periodo[-1])].remove_nodes_from(remove2)
    nx.readwrite.write_gpickle(locals()["bidbid_dir_single_"+str(periodo[-1])], "bidbid_dir_single_"+str(periodo[-1])+'.gpickle')
    nx.info(locals()["bidbid_dir_single_"+str(periodo[-1])])

# UNDI

## ANNO

In [None]:
##bidbid in stesso anno
links = []
subcb = cb_new[['id_year','date_y','id_y']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_y'])]
for i in tqdm(subcb['id_year'].unique()):
    subcb2 = subcb[subcb['id_year']==i]
    for couple in itertools.combinations(np.unique(subcb2['id_y'].values),2):
        links.append((couple + ({'date': subcb2['date_y'].values[0].year},)))


In [None]:
bidbid=nx.MultiGraph()
bidbid.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid.degree()).items() if degree == 0]
bidbid.remove_nodes_from(remove)
bidbid.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid.degree()).items() if degree == 0]
bidbid.remove_nodes_from(remove2)
nx.readwrite.write_gpickle(bidbid,'bidbid_cb.gpickle')
nx.info(bidbid)

## ROUND

In [None]:
##bidbid in stesso round
links = []
subcb = cb_new[['round_simp','id_year','date_y','id_y','size_real','id_round','investor_numbers','target_name']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_y'])]
subcb = subcb[pd.notna(subcb['size_real'])]
for i in tqdm(subcb['id_round'].unique()):
    subcb2 = subcb[subcb['id_round']==i]
    for couple in itertools.combinations(np.unique(subcb2['id_y'].values),2):
        links.append((couple + ({'date': subcb2['date_y'].values[0].year,
                                'round': subcb2['round_simp'].values[0],
                                'money': subcb2['size_real'].values[0]/(subcb2['investor_numbers'].values[0]-1),
                                'target_name': subcb2['target_name'].values[0]},)))


In [None]:
bidbid_round=nx.MultiGraph()
bidbid_round.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid_round.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid_round.degree()).items() if degree == 0]
bidbid_round.remove_nodes_from(remove)
bidbid_round.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid_round.degree()).items() if degree == 0]
bidbid_round.remove_nodes_from(remove2)
nx.readwrite.write_gpickle(bidbid_round,'bidbid_round_cb.gpickle')
nx.info(bidbid_round)

# STAGE

In [None]:
##bidbid in stesso stage
links = []
subcb = cb_new[['stage','date_y','id_y','size_real','id_stage','investor_numbers','target_name']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_y'])]
subcb = subcb[pd.notna(subcb['size_real'])]
for i in tqdm(subcb['id_stage'].unique()):
    subcb2 = subcb[subcb['id_stage']==i]
    for couple in itertools.combinations(np.unique(subcb2['id_y'].values),2):
        links.append((couple + ({'date': subcb2['date_y'].values[0].year,
                                'stage': subcb2['stage'].values[0],
                                'money': subcb2['size_real'].values[0]/(subcb2['investor_numbers'].values[0]-1),
                                'target_name': subcb2['target_name'].values[0]},)))

In [None]:
bidbid_stage=nx.MultiGraph()
bidbid_stage.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid_stage.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid_stage.degree()).items() if degree == 0]
bidbid_stage.remove_nodes_from(remove)
bidbid_stage.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid_stage.degree()).items() if degree == 0]
bidbid_stage.remove_nodes_from(remove2)
nx.readwrite.write_gpickle(bidbid_stage,'bidbid_stage_cb.gpickle')
nx.info(bidbid_stage)

## REPLACEMENT STAGE

In [None]:
##bidbid in stesso stage
links = []
subcb = cb_new[['stage','date_y','id_y','size_real','id_stage','investor_numbers','target_name']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_y'])]
subcb = subcb[pd.notna(subcb['size_real'])]
for i in tqdm(subcb['id_stage'].unique()):
    subcb2 = subcb[subcb['id_stage']==i]
    for couple in itertools.combinations_with_replacement(np.unique(subcb2['id_y'].values),2):
        links.append((couple + ({'date': subcb2['date_y'].values[0].year,
                                'stage': subcb2['stage'].values[0],
                                'money': subcb2['size_real'].values[0]/(subcb2['investor_numbers'].values[0]),
                                'target_name': subcb2['target_name'].values[0]},)))

In [None]:
bidbid_stage_self=nx.MultiGraph()
bidbid_stage_self.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid_stage_self.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid_stage_self.degree()).items() if degree == 0]
bidbid_stage_self.remove_nodes_from(remove)
bidbid_stage_self.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid_stage_self.degree()).items() if degree == 0]
bidbid_stage_self.remove_nodes_from(remove2)
nx.readwrite.write_gpickle(bidbid_stage_self,'bidbid_stage_self.gpickle')
nx.info(bidbid_stage_self)

# SINGLE


## STAGE SINGLE

In [None]:
bidbid_stage_single = nx.Graph()
for u,v,data in bidbid_stage.edges(data=True):
    d = data['date']
    m = data['money']
    s = data['stage']
    t = data['target_name']
    if bidbid_stage_single.has_edge(u,v):
        bidbid_stage_single[u][v]['date'] = list(set([bidbid_stage[u][v][0]['date']] + [d]))
        bidbid_stage_single[u][v]['money'] += m
        bidbid_stage_single[u][v]['stage'] = list(set([bidbid_stage[u][v][0]['stage']] + [s]))
        bidbid_stage_single[u][v]['target_name'] = list(set([bidbid_stage[u][v][0]['target_name']] + [t]))
    else:
        bidbid_stage_single.add_edge(u, v, date=[d])
        bidbid_stage_single.add_edge(u, v, money = m)
        bidbid_stage_single.add_edge(u, v, stage=[s])
        bidbid_stage_single.add_edge(u, v, target_name = [t])
        

In [None]:
nx.readwrite.write_gpickle(bidbid_stage_single,'bidbid_stage_single_cb.gpickle')
nx.info(bidbid_stage_single)

## REPLACEMENT STAGE SINGLE

In [None]:
bidbid_stage_self_single = nx.Graph()
for u,v,data in bidbid_stage_self.edges(data=True):
    d = data['date']
    m = data['money']
    s = data['stage']
    t = data['target_name']
    if bidbid_stage_self_single.has_edge(u,v):
        bidbid_stage_self_single[u][v]['date'] = list(set([bidbid_stage_self[u][v][0]['date']] + [d]))
        bidbid_stage_self_single[u][v]['money'] += m
        bidbid_stage_self_single[u][v]['stage'] = list(set([bidbid_stage_self[u][v][0]['stage']] + [s]))
        bidbid_stage_self_single[u][v]['target_name'] = list(set([bidbid_stage_self[u][v][0]['target_name']] + [t]))
    else:
        bidbid_stage_self_single.add_edge(u, v, date=[d])
        bidbid_stage_self_single.add_edge(u, v, money = m)
        bidbid_stage_self_single.add_edge(u, v, stage=[s])
        bidbid_stage_self_single.add_edge(u, v, target_name = [t])
        

In [None]:
nx.readwrite.write_gpickle(bidbid_stage_self_single,'bidbid_stage_self_single.gpickle')
nx.info(bidbid_stage_self_single)

# WINDOW

In [None]:
from itertools import islice

def window(seq, n):
    "Returns a sliding window (of width n) over data from the iterable"
    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result

In [None]:
finestra = window(list(range(1993,2022)),5)
for periodo in finestra:
    locals()["bidbid_stage_"+str(periodo[-1])]=nx.MultiGraph([(u,v,d) for u,v,d in bidbid_stage.edges(data=True) if d['date'] in periodo])
    print(periodo[-1])

## STAGE SINGLE ANNO

In [None]:
#tasformo i periodini multi in periodini single
for periodo in finestra:
    a = nx.Graph()
    for u,v,data in locals()["bidbid_stage_"+str(periodo[-1])].edges(data=True):
        d = data['date']
        m = data['money']
        s = data['stage']
        t = data['target_name']
        if a.has_edge(u,v):
            a[u][v]['date'] = list(set([locals()["bidbid_stage_"+str(periodo[-1])][u][v][0]['date']] + [d]))
            a[u][v]['money'] += m
            a[u][v]['stage'] = list(set([locals()["bidbid_stage_"+str(periodo[-1])][u][v][0]['stage']] + [s]))
            a[u][v]['target_name'] = list(set([locals()["bidbid_stage_"+str(periodo[-1])][u][v][0]['target_name']] + [t]))
        else:
            a.add_edge(u, v, date=[d])
            a.add_edge(u, v, money = m)
            a.add_edge(u, v, stage=[s])
            a.add_edge(u, v, target_name = [t])
    locals()["bidbid_stage_single_"+str(periodo[-1])] = a
    print(periodo[-1])

In [None]:
finestra = window(list(range(1993,2022)),5)
for periodo in finestra:
    locals()["bidbid_stage_single_"+str(periodo[-1])].add_nodes_from(nodes[nodes.bipartite==0].id)
    nx.set_node_attributes(locals()["bidbid_stage_single_"+str(periodo[-1])], pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
    nx.set_node_attributes(locals()["bidbid_stage_single_"+str(periodo[-1])], pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
    nx.set_node_attributes(locals()["bidbid_stage_single_"+str(periodo[-1])], pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
    nx.set_node_attributes(locals()["bidbid_stage_single_"+str(periodo[-1])], pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
    nx.set_node_attributes(locals()["bidbid_stage_single_"+str(periodo[-1])], pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
    nx.set_node_attributes(locals()["bidbid_stage_single_"+str(periodo[-1])], pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
    remove = [node for node,degree in dict( locals()["bidbid_stage_single_"+str(periodo[-1])].degree()).items() if degree == 0]
    locals()["bidbid_stage_single_"+str(periodo[-1])].remove_nodes_from(remove)
    locals()["bidbid_stage_single_"+str(periodo[-1])].remove_nodes_from(brutti)
    remove2 = [node for node,degree in dict( locals()["bidbid_stage_single_"+str(periodo[-1])].degree()).items() if degree == 0]
    locals()["bidbid_stage_single_"+str(periodo[-1])].remove_nodes_from(remove2)
    nx.readwrite.write_gpickle(locals()["bidbid_stage_single_"+str(periodo[-1])], "bidbid_stage_single_"+str(periodo[-1])+'.gpickle')
    nx.info(locals()["bidbid_stage_single_"+str(periodo[-1])])

## REPLACEMENT STAGE SINGLE ANNO

In [None]:
for periodo in finestra:
    locals()["bidbid_stage_self_"+str(periodo[-1])]=nx.MultiGraph([(u,v,d) for u,v,d in bidbid_stage_self.edges(data=True) if d['date'] in periodo])
    print(periodo[-1])

In [None]:
#tasformo i periodini multi in periodini single
for periodo in finestra:
    a = nx.Graph()
    for u,v,data in locals()["bidbid_stage_self_"+str(periodo[-1])].edges(data=True):
        d = data['date']
        m = data['money']
        s = data['stage']
        t = data['target_name']
        if a.has_edge(u,v):
            a[u][v]['date'] = list(set([locals()["bidbid_stage_self_"+str(periodo[-1])][u][v][0]['date']] + [d]))
            a[u][v]['money'] += m
            a[u][v]['stage'] = list(set([locals()["bidbid_stage_self_"+str(periodo[-1])][u][v][0]['stage']] + [s]))
            a[u][v]['target_name'] = list(set([locals()["bidbid_stage_self_"+str(periodo[-1])][u][v][0]['target_name']] + [t]))
        else:
            a.add_edge(u, v, date=[d])
            a.add_edge(u, v, money = m)
            a.add_edge(u, v, stage=[s])
            a.add_edge(u, v, target_name = [t])
    locals()["bidbid_stage_self_single_"+str(periodo[-1])] = a
    nx.readwrite.write_gpickle(locals()["bidbid_stage_self_single_"+str(periodo[-1])], "bidbid_stage_self_single_"+str(periodo[-1])+".gpickle")
    print(periodo[-1])
    

# COMMUNITIES

In [None]:
bidbid_stage = nx.readwrite.read_gpickle('bidbid_stage_cb.gpickle')

In [None]:
partition = community_louvain.best_partition(bidbid_stage)
#group the values
# store the names (the keys of
#the new dict) as a set (keeps elements unique)
#com = set(partition.values())

In [None]:


# use a list comprehension, iterating through keys and checking the values match each n
d = {}
for n in com:
    d[n] = [k for k in partition.keys() if partition[k] == n]
e = dict((k, v) for k, v in d.items() if len(v) >= 10)
f = list(e.items())

# PEOPLE

In [None]:
cb_new.to_csv('cb_new.csv')

In [None]:
##STARTUPS

nodi_target = cb_new[['target_name','target_people']].drop_duplicates()
nodi_target = nodi_target[pd.notna(nodi_target['target_people'])]
nodi_target = nodi_target.assign(target_persone=list(nodi_target.target_people.str.split(","))).explode('target_persone')
nodi_target = nodi_target[['target_name','target_persone']]
import re
persona = []
carica = []
for i in list(nodi_target['target_persone']):
        try:
            persona.append(i.partition(' (')[0])
            carica.append(re.search('\(([^)]+)', i).group(1))
        except:
            carica.append('not')
for i in range(len(carica)):
    if carica[i] == 'not':
        carica[i] = carica[i+1]
nodi_target = nodi_target.assign(carica = carica, persona = persona)
nodi_target = nodi_target[nodi_target['persona']!= ' Jr.']
nodi_target_single = nodi_target[['target_name','carica','persona']].rename(columns={'target_name':'name'})
nodi_target_single['type'] = 1

##INVESTORS
nodi_investor = cb_new[['investors_name','investor_ppl']].drop_duplicates()
nodi_investor = nodi_investor[pd.notna(nodi_investor['investor_ppl'])]
nodi_investor = nodi_investor.assign(investor_persone=list(nodi_investor.investor_ppl.str.split(","))).explode('investor_persone')
nodi_investor = nodi_investor[['investors_name','investor_persone']]
import re
persona = []
carica = []
for i in list(nodi_investor['investor_persone']):
        try:
            persona.append(i.partition(' (')[0])
            carica.append(re.search('\(([^)]+)', i).group(1))
        except:
            carica.append('not')
for i in range(len(carica)):
    if carica[i] == 'not':
        carica[i] = carica[i+1]
nodi_investor = nodi_investor.assign(carica = carica, persona = persona)
nodi_investor = nodi_investor[nodi_investor['persona']!= ' Jr.']
nodi_investor_single = nodi_investor[['investors_name','carica','persona']].rename(columns={'investors_name':'name'})
nodi_investor_single['type'] = 0

nodi = nodi_target_single.append(nodi_investor_single)

# ORGORG

In [None]:
B = nx.MultiGraph(name='org-bid')
# Add nodes with the node attribute "bipartite"
B.add_nodes_from(nodes[nodes.bipartite==0].id)
B.add_nodes_from(nodes[nodes.bipartite==1].id)
nx.set_node_attributes(B, pd.Series(nodes.node, index=nodes.id).to_dict(), 'name')
nx.set_node_attributes(B, pd.Series(nodes.country, index=nodes.id).to_dict(), 'country')
nx.set_node_attributes(B, pd.Series(nodes.continent, index=nodes.continent).to_dict(), 'continent')
nx.set_node_attributes(B, pd.Series(nodes.industry, index=nodes.id).to_dict(), 'industry')
nx.set_node_attributes(B, pd.Series(nodes.foundation, index=nodes.id).to_dict(), 'foundation')
nx.set_node_attributes(B, pd.Series(nodes.bipartite, index=nodes.id).to_dict(), 'bipartite')
nx.set_node_attributes(B, pd.Series(nodes.target_status, index=nodes.id).to_dict(), 'target_status')
# Add edges only between nodes of opposite node sets
cb_new_dates = cb_new[pd.notna(cb_new['date_y'])]
cb_new_dates.index = range(len(cb_new_dates)) 
for i in range(len(cb_new_dates)):
    B.add_edge(cb_new_dates.loc[i,'id_y'], cb_new_dates.loc[i,'id_x'], date=(cb_new_dates.loc[i,'date_y']).year, m_raised=cb_new_dates.loc[i,'size_real'], 
               investors = cb_new_dates.loc[i,'investor_numbers'], round_simp =cb_new_dates.loc[i,'round_simp'])
brutti = [x for x,y in B.nodes(data=True) if (y['name']=='Undisclosed Investors') or (y['name']=='Dead')   or (y['name']=='Public')  
          or  (y['name']=='Undisclosed Angel Investors') or (y['name']=='Undisclosed Venture Investors')]
B.remove_nodes_from(brutti)
remove = [node for node,degree in dict(B.degree()).items() if degree == 0]
B.remove_nodes_from(remove)
bid_nodes = {n for n, d in B.nodes(data=True) if d["bipartite"] == 0}
org_nodes = set(B) - bid_nodes

## ORGORG

In [None]:
def project_multi(B, nodes, name):
    G=nx.MultiGraph(name=name)
    count=0
    for node in tqdm(nodes):
        ego=nx.ego_graph(B, node)
        ndx=list(ego.nodes())
        ndx.remove(node)
        for year in range(1970,2022): # bad trick to link depending on year
            subndx=[x for x in ndx if [y for y in list(ego.edges(data=True)) if y[0]==x or y[1]==x][0][2]['date']==year]
            for couple in itertools.combinations(subndx, 2):
                if G.has_edge(couple[0], couple[1]):
                    continue
                G.add_edge(couple[0], couple[1], anno = year)
        count+=1
        if count%1000==0:
            print(count, end=' ')
    return G

In [None]:
orgorg = project_multi(B, bid_nodes, 'orgorg')

In [None]:
finestra = window(list(range(1970,2022)), 10)
for periodo in finestra:
    locals()["orgorg_"+str(periodo[-1])]=nx.MultiGraph([(u,v,d) for u,v,d in orgorg.edges(data=True) if d['anno'] in periodo])
    print(periodo[-1])

In [None]:
#tasformo i periodini multi in periodini single
finestra = window(list(range(1970,2022)), 10)
for periodo in finestra:
    a = nx.Graph()
    for u,v,data in locals()["orgorg_"+str(periodo[-1])].edges(data=True):
        d = data['anno']
        if a.has_edge(u,v)==False:
            a.add_edge(u, v, date=[d])        
    locals()["orgorg_single_"+str(periodo[-1])] = a
    nx.readwrite.write_gpickle(locals()["orgorg_single_"+str(periodo[-1])], "orgorg_single"+str(periodo[-1])+".gpickle")
    print(periodo[-1])

# ORGORG2

In [None]:
count=0
for year in range(1979,2022):
    locals()["orgorg2_"+str(year)] = nx.MultiGraph()
for node in tqdm(bid_nodes):
    ego=nx.ego_graph(B, node)
    ndx=list(ego.nodes())
    ndx.remove(node)
    finestra = window(list(range(1970,2022)), 10)
    for periodo in finestra: # bad trick to link depending on year
        subndx=[x for x in ndx if [y for y in list(ego.edges(data=True)) if y[0]==x or y[1]==x][0][2]['date'] in periodo]
        for couple in itertools.combinations(subndx, 2):
            anno1 = ego[node][couple[0]][0]['date']
            anno2 = ego[node][couple[1]][0]['date']
            locals()["orgorg2_"+str(periodo[-1])].add_edge(couple[0], couple[1], anno = max(anno1,anno2))
    count+=1
    if count%1000==0:
        print(count, end=' ')

In [None]:
#tasformo i periodini multi in periodini single
finestra = window(list(range(1970,2022)), 10)
for periodo in finestra:
    a = nx.Graph()
    for u,v,data in locals()["orgorg2_"+str(periodo[-1])].edges(data=True):
        d = data['anno']
        if a.has_edge(u,v)==False:
            a.add_edge(u, v, date=[d])        
    locals()["orgorg2_single_"+str(periodo[-1])] = a
    nx.readwrite.write_gpickle(locals()["orgorg2_single_"+str(periodo[-1])], "orgorg2_single"+str(periodo[-1])+".gpickle")
    print(periodo[-1])

# ORGORG3

In [None]:
count=0
finestra = list(range(1979,2022))
win = []
for i in range(1,len(finestra)+1):
    win.append(finestra[:i])
for year in range(1979,2022):
    locals()["orgorg3_"+str(year)] = nx.MultiGraph()

for node in tqdm(bid_nodes):
    ego=nx.ego_graph(B, node)
    ndx=list(ego.nodes())
    ndx.remove(node)
    for periodo in win: # bad trick to link depending on year
        subndx=[x for x in ndx if [y for y in list(ego.edges(data=True)) if y[0]==x or y[1]==x][0][2]['date'] in periodo]
        for couple in itertools.combinations(subndx, 2):
            anno1 = ego[node][couple[0]][0]['date']
            anno2 = ego[node][couple[1]][0]['date']
            locals()["orgorg3_"+str(periodo[-1])].add_edge(couple[0], couple[1], anno = max(anno1,anno2))
    count+=1
    if count%1000==0:
        print(count, end=' ')

In [None]:
#tasformo i periodini multi in periodini single
for periodo in win:
    a = nx.Graph()
    for u,v,data in locals()["orgorg3_"+str(periodo[-1])].edges(data=True):
        d = data['anno']
        if a.has_edge(u,v)==False:
            a.add_edge(u, v, date=[d])        
    locals()["orgorg3_single_"+str(periodo[-1])] = a
    nx.readwrite.write_gpickle(locals()["orgorg3_single_"+str(periodo[-1])], "orgorg3_single"+str(periodo[-1])+".gpickle")
    print(periodo[-1])

# COVARIATES

In [None]:
nodes_fda = pd.read_csv('xi.csv')['x']
covariates = pd.read_csv('CB_with_covariates.csv')

In [None]:
eigen_max = []
pages_max = []
degcen_max = []
bet_max = []
avg_max = []
dis_mean = []
clo_max = []
newman_max = []
vote_min = []
for i in tqdm(nodes_fda):
    sub = cb_new[['target_ID','date_y','id_y']]
    sub = sub[sub['target_ID']==i]
    sub2 = sub[sub['date_y']==sub['date_y'].values[0]]
    anno = sub['date_y'].values[0].year
    investors = set(sub2['id_y'])
    eigens = []
    pages = []
    degcen = []
    bet = []
    avg = []
    dis = []
    clo = []
    newman = []
    vote = []
    for l in investors:
        try:
            eigens.append(covariates[covariates['id']==l]['eigenvector_centrality_single_'+str(anno)].values[0])
            pages.append(covariates[covariates['id']==l]['pagerank_single_'+str(anno)].values[0])
            degcen.append(covariates[covariates['id']==l]['degree_centrality_single_'+str(anno)].values[0])  
            bet.append(covariates[covariates['id']==l]['betweenness_centrality_single_'+str(anno)].values[0]) 
            avg.append(covariates[covariates['id']==l]['average_neighbor_degree_single_'+str(anno)].values[0])
            dis.append(covariates[covariates['id']==l]['dispersion_single_'+str(anno)].values[0])
            clo.append(covariates[covariates['id']==l]['closeness_centrality_single_'+str(anno)].values[0])
            newman.append(covariates[covariates['id']==l]['newman_betweenness_centrality_single_'+str(anno)].values[0])
            vote.append(covariates[covariates['id']==l]['voterank_single_'+str(anno)].values[0])
            
        except:
            eigens.append(np.nan)
            pages.append(np.nan)
            degcen.append(np.nan)
            bet.append(np.nan)
            avg.append(np.nan)
            dis.append(np.nan)
            clo.append(np.nan)
            newman.append(np.nan)
            vote.append(np.nan)
            
    eigen_max.append(np.nanmax(eigens))
    pages_max.append(np.nanmax(pages))
    degcen_max.append(np.nanmax(degcen))
    bet_max.append(np.nanmax(bet))
    avg_max.append(np.nanmax(avg))
    dis_mean.append(np.nanmean(dis))
    clo_max.append(np.nanmax(clo))
    newman_max.append(np.nanmax(newman))
    vote_min.append(np.nanmin(vote))


In [None]:
covariate = nodes[(nodes['index'].isin(list(nodes_fda)))&(nodes['bipartite']==1)]
covariate = covariate.assign(eigen_max = eigen_max, pages_max = pages_max, degcen_max = degcen_max, bet_max = bet_max, avg_max = avg_max,
                 dis_mean = dis_mean, newman_max = newman_max, vote_min = vote_min)


In [None]:
covariate.to_csv('covariate.csv', index = False)