In [None]:
import numpy as np
import math
import statsmodels.api as sm
import imblearn
from tqdm.notebook import tqdm
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from networkx.algorithms import bipartite, community 
from numpy import nansum
from numpy import nanmean
from numpy.random import choice
import itertools
from networkx.algorithms import community
import community as community_louvain
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import networkx as nx
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 300)

## Bipartite network generation

### Data preparation

In [None]:
cb1 = pd.read_csv('cb1.csv')
cb2 = pd.read_csv('cb2.csv')
cb3 = pd.read_csv('cb3.csv')

In [None]:
cb = cb1.append(cb2)
cb = cb.append(cb3)
cb.index = range(len(cb))

In [None]:
cb['date_my'] = (pd.to_datetime(cb['date'],format='%m/%d/%Y', errors='coerce')).dt.to_period('m')
cb['date_y'] = (pd.to_datetime(cb['date'],format='%m/%d/%Y', errors='coerce')).dt.to_period('y')

# DESCRIZIONE

In [None]:
cb.groupby(['target_continent','investor_continent']).size().unstack(fill_value=0)

In [None]:
cb_a = cb.groupby(['target_continent','investor_continent']).size().unstack(fill_value=0)
cb_a["sum"] = cb_a.sum(axis = 0)
cb_a = cb_a.loc[:,"Africa":"South America"].div(cb_a["sum"], axis=0)
cb_a

In [None]:
es1 = cb.groupby(['target_name','stage'], as_index = False).agg({"size_real": 'first'})
es2 = es1[['target_name','size_real']].groupby(['target_name'],as_index=False).sum()
es3 = es2[es2['size_real']!=0]
np.log(es3['size_real']).plot.kde(0.01)

In [None]:
np.log(es3['size_real']).plot.kde()

In [None]:
es3.sort_values("size_real",ascending=False)[0:300]

In [None]:
cb[cb['target_name']=='zumutor']

In [None]:
cb

In [None]:
cb[cb['investors_name']=='Public'][-300:]

In [None]:
#ordinare gli investitori

inv_list = list(cb['investors_name'].values)
inv_counter = {}
for inv in inv_list:
    if inv in inv_counter:
        inv_counter[inv] += 1
    else:
        inv_counter[inv] = 1

popular_invs = sorted(inv_counter, key = inv_counter.get, reverse = True)
 
top = popular_invs[:20]

In [None]:
for i in popular_invs:
    if pd.isna(i):
        print(i)

In [None]:
inv_counter['Dead']

# GRAPHS

In [None]:
nodes_org = cb[['target_name', 'target_ID']].drop_duplicates()
nodes_org['bipartite'] = 1
nodes_inv = cb[['investors_name', 'investor_id']].drop_duplicates()
nodes_org = nodes_org.rename(columns={"target_name": "node", "target_country": "country", 
                                      "target_continent": "continent", "industry":"industry",
                                     "target_ID":"index"})
nodes_inv = nodes_inv.rename(columns={"investors_name": "node","investor_country": "country", 
                                      "investor_continent": "continent", "investor_type":"industry","investor_id":'index' })
nodes_inv['bipartite'] = 0
nodes = nodes_org.append(nodes_inv, ignore_index= True)
nodes['id'] = nodes.index
nodes

In [None]:
numbers = cb.groupby(['target_name','stage'], as_index = False).count()[['target_name','stage','investors_name']].rename(columns={"investors_name": "investor_numbers"})

In [None]:
cb_new = pd.merge(cb, numbers,  how='left', left_on=['target_name','stage'], right_on = ['target_name','stage'])
cb_new = pd.merge(cb_new, nodes[nodes['bipartite']== 1],  how='left', left_on=['target_name','target_ID'], right_on = ['node','index'])
cb_new = pd.merge(cb_new, nodes[nodes['bipartite']== 0],  how='left', left_on=['investors_name','investor_id'], right_on = ['node','index'])


In [None]:
##ripeto quanto fatto prima (semplificato prima per risparmiare tempo) ma aggiungo roba
nodes_org = cb_new[['target_name', 'target_ID', 'target_country', 'target_continent',
               'industry','target_founded','target_status']].drop_duplicates()
nodes_org['bipartite'] = 1
nodes_inv = cb_new[['investors_name', 'investor_id', 'investor_country', 'investor_continent', 'investor_type',
                   'investor_founded']].drop_duplicates()
nodes_org = nodes_org.rename(columns={"target_name": "node", "target_country": "country", 
                                      "target_continent": "continent", "industry":"industry",
                                     "target_ID":"index", "target_founded":"foundation"})
nodes_inv = nodes_inv.rename(columns={"investors_name": "node","investor_country": "country", 
                                      "investor_continent": "continent", "investor_type":"industry", "investor_id":'index',
                                     "investor_founded":"foundation"})
nodes_inv['bipartite'] = 0
nodes = nodes_org.append(nodes_inv, ignore_index= True)
nodes['id'] = nodes.index
nodes.columns



In [None]:
B = nx.MultiGraph(name='org-bid')
# Add nodes with the node attribute "bipartite"
B.add_nodes_from(nodes[nodes.bipartite==0].id)
B.add_nodes_from(nodes[nodes.bipartite==1].id)
nx.set_node_attributes(B, pd.Series(nodes.node, index=nodes.id).to_dict(), 'name')
nx.set_node_attributes(B, pd.Series(nodes.country, index=nodes.id).to_dict(), 'country')
nx.set_node_attributes(B, pd.Series(nodes.continent, index=nodes.continent).to_dict(), 'continent')
nx.set_node_attributes(B, pd.Series(nodes.industry, index=nodes.id).to_dict(), 'industry')
nx.set_node_attributes(B, pd.Series(nodes.foundation, index=nodes.id).to_dict(), 'foundation')
nx.set_node_attributes(B, pd.Series(nodes.bipartite, index=nodes.id).to_dict(), 'bipartite')
nx.set_node_attributes(B, pd.Series(nodes.target_status, index=nodes.id).to_dict(), 'target_status')
# Add edges only between nodes of opposite node sets
for i in range(len(cb_new)):
    B.add_edge(cb_new.loc[i,'id_y'], cb_new.loc[i,'id_x'], date=(cb_new.loc[i,'date_y']).year, m_raised=cb_new.loc[i,'size_real'], 
               investors = cb_new.loc[i,'investor_numbers'], round_simp =cb_new.loc[i,'round_simp'])

In [None]:
##nodi brutti
#'Undisclosed Investors',
# 'Public',
#'Undisclosed Angel Investors',
#'Undisclosed Venture Investors',
#Dead


In [None]:
brutti = [x for x,y in B.nodes(data=True) if (y['name']=='Undisclosed Investors') or (y['name']=='Dead')   or (y['name']=='Public')  
          or  (y['name']=='Undisclosed Angel Investors') or (y['name']=='Undisclosed Venture Investors')]
B.remove_nodes_from(brutti)

In [None]:
'Grant - XIII'

In [None]:
biz = [(x,y) for x,y,d in B.edges(data=True) if (d['round_simp']=='Portfolio Merger')]
biz

In [None]:
##degree sorting
sorted(B.degree, key=lambda x: x[1], reverse=True)

In [None]:
B.nodes[1119]

In [None]:
bid_nodes = {n for n, d in B.nodes(data=True) if d["bipartite"] == 0}
org_nodes = set(B) - bid_nodes

# NEW GRAPHS

In [None]:
#DA SISTEMARE


True

# BIDBID

In [None]:
id_round = cb_new[['target_ID','round_simp']]
id_round = id_round.drop_duplicates()
id_round.index = range(len(id_round))
id_round['id_round'] = id_round.index
id_round

id_year = cb_new[['target_ID','date_y']]
id_year = id_year.drop_duplicates()
id_year.index = range(len(id_year))
id_year['id_year'] = id_year.index
id_year

id_stage = cb_new[['target_ID','stage']]
id_stage = id_stage.drop_duplicates()
id_stage.index = range(len(id_stage))
id_stage['id_stage'] = id_stage.index
id_stage

In [None]:
cb_new = pd.merge(cb_new, id_round,  how='left', left_on=['target_ID','round_simp'], right_on = ['target_ID','round_simp'])


In [None]:
cb_new = pd.merge(cb_new, id_stage,  how='left', left_on=['target_ID','stage'], right_on = ['target_ID','stage'])

In [None]:
cb_new = pd.merge(cb_new, id_year,  how='left', left_on=['target_ID','date_y'], right_on = ['target_ID','date_y'])

In [None]:
nodes_org = nodes[nodes['bipartite']==1]
nodes_inv = nodes[nodes['bipartite']==0]

In [None]:
nodes_inv

# FOLLOWERS

In [970]:
##bidbid in stesso stage
links = []
subcb = cb_new[['stage','date_my','id_y','size_real','investor_numbers','target_name']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_my'])]
subcb = subcb[pd.notna(subcb['size_real'])]
for i in tqdm(subcb['target_name'].unique()):
    subcb2 = subcb[subcb['target_name']==i]
    for date1 in (subcb2['date_my']).unique():
        for date2 in (subcb2['date_my']).unique():
            if date2>date1:
                list1 = list(subcb2[subcb2['date_my']==date1]['id_y'])
                list2 = list(subcb2[subcb2['date_my']==date2]['id_y'])
                for couple in itertools.product(list2, list1):
                    riferimento = subcb2[subcb2['date_my']==date2]
                    links.append((couple + ({'money': riferimento['size_real'].values[0]/riferimento['investor_numbers'].values[0],
                             'date': date2.year,
                             'stage': riferimento['stage'].values[0],
                             'target_name':i},)))

HBox(children=(FloatProgress(value=0.0, max=27175.0), HTML(value='')))




In [971]:
bidbid_dir=nx.MultiDiGraph()
bidbid_dir.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid_dir, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid_dir.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid_dir.degree()).items() if degree == 0]
bidbid_dir.remove_nodes_from(remove)
bidbid_dir.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid_dir.degree()).items() if degree == 0]
bidbid_dir.remove_nodes_from(remove2)
nx.readwrite.write_gpickle(bidbid_dir,'bidbid_dir_cb.gpickle')
nx.info(bidbid_dir)

'Name: \nType: MultiDiGraph\nNumber of nodes: 15354\nNumber of edges: 576889\nAverage in degree:  37.5726\nAverage out degree:  37.5726'

In [984]:
sorted(bidbid_dir.out_degree, key=lambda x: x[1], reverse=True)

[(77356, 6602),
 (50667, 4997),
 (51253, 4530),
 (50595, 3772),
 (51084, 3483),
 (51153, 3428),
 (50481, 3218),
 (50770, 3172),
 (50919, 2885),
 (50917, 2799),
 (50766, 2777),
 (51077, 2260),
 (50605, 2159),
 (50519, 2128),
 (51291, 2106),
 (50685, 2057),
 (51276, 2023),
 (51305, 1905),
 (50939, 1883),
 (57908, 1882),
 (50985, 1858),
 (51263, 1770),
 (52858, 1708),
 (50516, 1699),
 (52141, 1679),
 (50480, 1663),
 (55245, 1653),
 (50710, 1648),
 (55129, 1629),
 (54057, 1622),
 (50585, 1617),
 (51005, 1616),
 (50515, 1571),
 (56764, 1486),
 (51204, 1481),
 (50989, 1445),
 (51063, 1434),
 (50859, 1412),
 (50683, 1406),
 (54061, 1405),
 (50577, 1393),
 (51175, 1379),
 (50512, 1376),
 (50594, 1358),
 (50874, 1337),
 (52591, 1328),
 (51709, 1318),
 (52127, 1317),
 (50803, 1245),
 (50965, 1243),
 (50668, 1230),
 (50910, 1222),
 (51365, 1213),
 (50893, 1209),
 (52458, 1205),
 (52143, 1196),
 (56017, 1180),
 (55127, 1168),
 (56477, 1120),
 (50638, 1116),
 (50599, 1097),
 (51304, 1087),
 (54042,

In [987]:
bidbid_dir.nodes[50605]

{'name': 'Frazier Healthcare Partners',
 'country': 'United States',
 'continent': 'North America',
 'industry': 'Private Equity',
 'foundation': 1991.0,
 'bipartite': 0}

In [988]:
from itertools import islice

def window(seq, n):
    "Returns a sliding window (of width n) over data from the iterable"
    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result

In [989]:
finestra = window(list(range(1993,2022)),5)

In [None]:
for periodo in finestra:
    locals()["bidbid_dir_"+str(periodo[-1])]=nx.MultiGraph([(u,v,d) for u,v,d in bidbid_dir.edges(data=True) if d['date'] in periodo])
    print(periodo[-1])

1997
1998
1999
2000
2001
2002
2003
2004
2005
2006


In [None]:
#tasformo i periodini multi in periodini single
for periodo in finestra:
    a = nx.Graph()
    for u,v,data in locals()["bidbid_dir_"+str(periodo[-1])].edges(data=True):
        d = data['date']
        m = data['money']
        s = data['stage']
        t = data['target_name']
        if a.has_edge(u,v):
            a[u][v]['date'] = list(set([locals()["bidbid_dir_"+str(periodo[-1])][u][v][0]['date']] + [d]))
            a[u][v]['money'] += m
            a[u][v]['stage'] = list(set([locals()["bidbid_dir_"+str(periodo[-1])][u][v][0]['stage']] + [s]))
            a[u][v]['target_name'] = list(set([locals()["bidbid_dir_"+str(periodo[-1])][u][v][0]['target_name']] + [t]))
        else:
            a.add_edge(u, v, date=[d])
            a.add_edge(u, v, money = m)
            a.add_edge(u, v, stage=[s])
            a.add_edge(u, v, target_name = [t])
    locals()["bidbid_dir_single_"+str(periodo[-1])] = a
    print(periodo[-1])

# UNDI

In [None]:
##bidbid in stesso anno
links = []
subcb = cb_new[['id_year','date_y','id_y']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_y'])]
for i in tqdm(subcb['id_year'].unique()):
    subcb2 = subcb[subcb['id_year']==i]
    for couple in itertools.combinations(np.unique(subcb2['id_y'].values),2):
        links.append((couple + ({'date': subcb2['date_y'].values[0].year},)))


In [None]:
bidbid=nx.MultiGraph()
bidbid.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid.degree()).items() if degree == 0]
bidbid.remove_nodes_from(remove)
bidbid.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid.degree()).items() if degree == 0]
bidbid.remove_nodes_from(remove2)
nx.readwrite.write_gpickle(bidbid,'bidbid_cb.gpickle')
nx.info(bidbid)

In [None]:
nx.info(bidbid)

In [None]:
[node for node,degree in dict(bidbid.degree()).items() if degree == 0]

In [None]:
bidbid.nodes[50481]

In [None]:
sorted(bidbid.degree, key=lambda x: x[1], reverse=True)

In [None]:
##bidbid in stesso round
links = []
subcb = cb_new[['round_simp','id_year','date_y','id_y','size_real','id_round','investor_numbers','target_name']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_y'])]
subcb = subcb[pd.notna(subcb['size_real'])]
for i in tqdm(subcb['id_round'].unique()):
    subcb2 = subcb[subcb['id_round']==i]
    for couple in itertools.combinations(np.unique(subcb2['id_y'].values),2):
        links.append((couple + ({'date': subcb2['date_y'].values[0].year,
                                'round': subcb2['round_simp'].values[0],
                                'money': subcb2['size_real'].values[0]/(subcb2['investor_numbers'].values[0]-1),
                                'target_name': subcb2['target_name'].values[0]},)))


In [None]:
bidbid_round=nx.MultiGraph()
bidbid_round.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid_round, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid_round.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid_round.degree()).items() if degree == 0]
bidbid_round.remove_nodes_from(remove)
bidbid_round.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid_round.degree()).items() if degree == 0]
bidbid_round.remove_nodes_from(remove2)
nx.readwrite.write_gpickle(bidbid_round,'bidbid_round_cb.gpickle')
nx.info(bidbid_round)

In [None]:
##bidbid in stesso stage
links = []
subcb = cb_new[['stage','date_y','id_y','size_real','id_stage','investor_numbers','target_name']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_y'])]
subcb = subcb[pd.notna(subcb['size_real'])]
for i in tqdm(subcb['id_stage'].unique()):
    subcb2 = subcb[subcb['id_stage']==i]
    for couple in itertools.combinations(np.unique(subcb2['id_y'].values),2):
        links.append((couple + ({'date': subcb2['date_y'].values[0].year,
                                'stage': subcb2['stage'].values[0],
                                'money': subcb2['size_real'].values[0]/(subcb2['investor_numbers'].values[0]-1),
                                'target_name': subcb2['target_name'].values[0]},)))

In [None]:
bidbid_stage=nx.MultiGraph()
bidbid_stage.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid_stage, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid_stage.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid_stage.degree()).items() if degree == 0]
bidbid_stage.remove_nodes_from(remove)
bidbid_stage.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid_stage.degree()).items() if degree == 0]
bidbid_stage.remove_nodes_from(remove2)
nx.readwrite.write_gpickle(bidbid_stage,'bidbid_stage_cb.gpickle')
nx.info(bidbid_stage)

In [None]:
##REPLACEMENT STAGE

In [950]:
##bidbid in stesso stage
links = []
subcb = cb_new[['stage','date_y','id_y','size_real','id_stage','investor_numbers','target_name']].drop_duplicates()
subcb = subcb[pd.notna(subcb['date_y'])]
subcb = subcb[pd.notna(subcb['size_real'])]
for i in tqdm(subcb['id_stage'].unique()):
    subcb2 = subcb[subcb['id_stage']==i]
    for couple in itertools.combinations_with_replacement(np.unique(subcb2['id_y'].values),2):
        links.append((couple + ({'date': subcb2['date_y'].values[0].year,
                                'stage': subcb2['stage'].values[0],
                                'money': subcb2['size_real'].values[0]/(subcb2['investor_numbers'].values[0]),
                                'target_name': subcb2['target_name'].values[0]},)))

HBox(children=(FloatProgress(value=0.0, max=63035.0), HTML(value='')))




In [951]:
bidbid_stage_self=nx.MultiGraph()
bidbid_stage_self.add_nodes_from(nodes[nodes.bipartite==0].id)
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.node, index=nodes_inv.id).to_dict(), 'name')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.country, index=nodes_inv.id).to_dict(), 'country')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.continent, index=nodes_inv.id).to_dict(), 'continent')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.industry, index=nodes_inv.id).to_dict(), 'industry')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.foundation, index=nodes_inv.id).to_dict(), 'foundation')
nx.set_node_attributes(bidbid_stage_self, pd.Series(nodes_inv.bipartite, index=nodes_inv.id).to_dict(), 'bipartite')
bidbid_stage_self.add_edges_from(links) 
remove = [node for node,degree in dict(bidbid_stage_self.degree()).items() if degree == 0]
bidbid_stage_self.remove_nodes_from(remove)
bidbid_stage_self.remove_nodes_from(brutti)
remove2 = [node for node,degree in dict(bidbid_stage_self.degree()).items() if degree == 0]
bidbid_stage_self.remove_nodes_from(remove2)
nx.readwrite.write_gpickle(bidbid_stage_self,'bidbid_stage_self.gpickle')
nx.info(bidbid_stage_self)

'Name: \nType: MultiGraph\nNumber of nodes: 20413\nNumber of edges: 331456\nAverage degree:  32.4750'

In [966]:
cb_new.head()

Unnamed: 0,target_name,stage,size_MLN,date,investors_name,url,description,industry,target_country,target_collections,target_status,target_ID,target_founded,round_simp,target_continent,target_state,target_city,target_street,target_people,target_competitors,investor_name,investor_url,investor_description,investor_country,investor_type,investor_deals_now,investor_companies,investor_deals_total,investor_exit_12,investor_ppl,investor_deals_12,investor_collections,investor_id,investor_industry,investor_state,investor_continent,investor_city,investor_founded,investor_street,date_dt,DATE,CPIAUCSL,size_real,date_my,date_y,investor_numbers,node_x,index_x,bipartite_x,id_x,node_y,index_y,bipartite_y,id_y,id_round,id_year,id_stage
0,Shionogi & Co.,IPO,,05/16/1949,Public,shionogi.com,Shionogi & Co. (TYO: 4507) focuses on the rese...,Pharmaceuticals / Drugs,Japan,Pharma Startups,IPO / Went public,262824,1878.0,IPO,Asia,,Osaka,"1-8, Doshomachi 3-chome Chuo-ku…",,Sumitomo Dainippon Pharma,,,,,,,,,,,,,,,,,,,,1949-05,1949-05,23.91,,1949-05,1949,1,Shionogi & Co.,262824.0,1,0,Public,,0,50462,0,0,0
1,Astellas Pharma,IPO,,05/16/1949,Public,astellas.com,Astellas Pharma (TSE: 4503) specializes in the...,Pharmaceuticals / Drugs,Japan,"Digital Therapeutics, Pharma Startups, Cancer ...",IPO / Went public,243168,1923.0,IPO,Asia,,Tokyo,"2-5-1, Nihonbashi-Honcho, Chuo-ku…","Yoshihiko Hatanaka (CEO),Yoshirou Myyokawa (EV...",Sumitomo Dainippon Pharma,,,,,,,,,,,,,,,,,,,,1949-05,1949-05,23.91,,1949-05,1949,1,Astellas Pharma,243168.0,1,1,Public,,0,50462,1,1,1
2,Sumitomo Dainippon Pharma,IPO,,05/16/1949,Public,ds-pharma.com,Sumitomo Dainippon Pharma (TYO: 4506) is a pha...,Pharmaceuticals / Drugs,Japan,Digital Therapeutics and Pharma Startups…,IPO / Went public,447180,1897.0,IPO,Asia,,Osaka,"6-8, Doshomachi 2-chome Chuo-ku…",Masayo Tada (CEO),"Astellas Pharma,Chugai Pharmaceutical,Daiichi ...",,,,,,,,,,,,,,,,,,,,1949-05,1949-05,23.91,,1949-05,1949,1,Sumitomo Dainippon Pharma,447180.0,1,2,Public,,0,50462,2,2,2
3,Konica Minolta,IPO,,05/16/1949,Public,konicaminolta.com,"Konica Minolta (コニカミノルタ) (TYO: 4902), founded ...",Medical Devices & Equipment,Japan,Conference Exhibitors,IPO / Went public,57191,1936.0,IPO,Asia,,Tokyo,"JP Tower 2-7-2 Marunouchi, Chiyoda-ku…",Michael Mathé (SVP),Xerox,,,,,,,,,,,,,,,,,,,,1949-05,1949-05,23.91,,1949-05,1949,1,Konica Minolta,57191.0,1,3,Public,,0,50462,3,3,3
4,Takeda Pharmaceutical,IPO,,05/16/1949,Public,takeda.com,Takeda Pharmaceutical (TYO: 4502) (NYSE: TAK) ...,Pharmaceuticals / Drugs,Japan,"Poop Tech, Pharma Startups, Rare Diseases, Sle...",IPO / Went public,373007,1781.0,IPO,Asia,,Tokyo,"2-1-1 Nihonbashihonmachi, Chuo-ku…","Frank Morich (CEO),Yasuchika Hasegawa (CEO),Da...","Sumitomo Dainippon Pharma,Sunovion…",,,,,,,,,,,,,,,,,,,,1949-05,1949-05,23.91,,1949-05,1949,1,Takeda Pharmaceutical,373007.0,1,4,Public,,0,50462,4,4,4


In [965]:
bidbid_stage_self.nodes[50479]

{'name': 'Presidio Partners',
 'country': 'United States',
 'continent': 'North America',
 'industry': 'Venture Capital',
 'foundation': nan,
 'bipartite': 0}

In [None]:
for u,v,z in bidbid_stage.edges(data = True):
    print((u,v,z))

In [None]:
cb_new[cb_new['target_name']=='Evidation Health']

# SINGLE


In [None]:
bidbid_stage_single = nx.Graph()
for u,v,data in bidbid_stage.edges(data=True):
    d = data['date']
    m = data['money']
    s = data['stage']
    t = data['target_name']
    if bidbid_stage_single.has_edge(u,v):
        bidbid_stage_single[u][v]['date'] = list(set([bidbid_stage[u][v][0]['date']] + [d]))
        bidbid_stage_single[u][v]['money'] += m
        bidbid_stage_single[u][v]['stage'] = list(set([bidbid_stage[u][v][0]['stage']] + [s]))
        bidbid_stage_single[u][v]['target_name'] = list(set([bidbid_stage[u][v][0]['target_name']] + [t]))
    else:
        bidbid_stage_single.add_edge(u, v, date=[d])
        bidbid_stage_single.add_edge(u, v, money = m)
        bidbid_stage_single.add_edge(u, v, stage=[s])
        bidbid_stage_single.add_edge(u, v, target_name = [t])
        

In [None]:
nx.readwrite.write_gpickle(bidbid_stage_single,'bidbid_stage_single_cb.gpickle')
nx.info(bidbid_stage_single)

In [None]:
bidbid_stage_single[50479][50480]

In [None]:
##REPLACEMENT

In [952]:
bidbid_stage_self_single = nx.Graph()
for u,v,data in bidbid_stage_self.edges(data=True):
    d = data['date']
    m = data['money']
    s = data['stage']
    t = data['target_name']
    if bidbid_stage_self_single.has_edge(u,v):
        bidbid_stage_self_single[u][v]['date'] = list(set([bidbid_stage_self[u][v][0]['date']] + [d]))
        bidbid_stage_self_single[u][v]['money'] += m
        bidbid_stage_self_single[u][v]['stage'] = list(set([bidbid_stage_self[u][v][0]['stage']] + [s]))
        bidbid_stage_self_single[u][v]['target_name'] = list(set([bidbid_stage_self[u][v][0]['target_name']] + [t]))
    else:
        bidbid_stage_self_single.add_edge(u, v, date=[d])
        bidbid_stage_self_single.add_edge(u, v, money = m)
        bidbid_stage_self_single.add_edge(u, v, stage=[s])
        bidbid_stage_self_single.add_edge(u, v, target_name = [t])
        

In [954]:
nx.readwrite.write_gpickle(bidbid_stage_self_single,'bidbid_stage_self_single.gpickle')
nx.info(bidbid_stage_self_single)

'Name: \nType: Graph\nNumber of nodes: 20413\nNumber of edges: 164243\nAverage degree:  16.0920'

# WINDOW

In [None]:
from itertools import islice

def window(seq, n):
    "Returns a sliding window (of width n) over data from the iterable"
    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result

In [963]:
finestra = window(list(range(1993,2022)),5)

In [940]:
for periodo in finestra:
    locals()["bidbid_stage_"+str(periodo[-1])]=nx.MultiGraph([(u,v,d) for u,v,d in bidbid_stage.edges(data=True) if d['date'] in periodo])
    print(periodo[-1])

1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021


In [None]:
locals()["bidbid_stage_"+str(periodo[-1])][u][v]

In [942]:
#tasformo i periodini multi in periodini single
for periodo in finestra:
    a = nx.Graph()
    for u,v,data in locals()["bidbid_stage_"+str(periodo[-1])].edges(data=True):
        d = data['date']
        m = data['money']
        s = data['stage']
        t = data['target_name']
        if a.has_edge(u,v):
            a[u][v]['date'] = list(set([locals()["bidbid_stage_"+str(periodo[-1])][u][v][0]['date']] + [d]))
            a[u][v]['money'] += m
            a[u][v]['stage'] = list(set([locals()["bidbid_stage_"+str(periodo[-1])][u][v][0]['stage']] + [s]))
            a[u][v]['target_name'] = list(set([locals()["bidbid_stage_"+str(periodo[-1])][u][v][0]['target_name']] + [t]))
        else:
            a.add_edge(u, v, date=[d])
            a.add_edge(u, v, money = m)
            a.add_edge(u, v, stage=[s])
            a.add_edge(u, v, target_name = [t])
    locals()["bidbid_stage_single_"+str(periodo[-1])] = a
    print(periodo[-1])

1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021


In [943]:
nx.info(bidbid_stage_single_2021)

'Name: \nType: Graph\nNumber of nodes: 10374\nNumber of edges: 65674\nAverage degree:  12.6613'

In [945]:
for periodo in finestra:
    nx.readwrite.write_gpickle(locals()["bidbid_stage_single_"+str(periodo[-1])], "bidbid_stage_single_"+str(periodo[-1])+".gpickle")
    print(periodo[-1])

1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021


In [None]:
##REPLACEMENT

In [960]:
for periodo in finestra:
    locals()["bidbid_stage_self_"+str(periodo[-1])]=nx.MultiGraph([(u,v,d) for u,v,d in bidbid_stage_self.edges(data=True) if d['date'] in periodo])
    print(periodo[-1])

1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021


In [964]:
#tasformo i periodini multi in periodini single
for periodo in finestra:
    a = nx.Graph()
    for u,v,data in locals()["bidbid_stage_self_"+str(periodo[-1])].edges(data=True):
        d = data['date']
        m = data['money']
        s = data['stage']
        t = data['target_name']
        if a.has_edge(u,v):
            a[u][v]['date'] = list(set([locals()["bidbid_stage_self_"+str(periodo[-1])][u][v][0]['date']] + [d]))
            a[u][v]['money'] += m
            a[u][v]['stage'] = list(set([locals()["bidbid_stage_self_"+str(periodo[-1])][u][v][0]['stage']] + [s]))
            a[u][v]['target_name'] = list(set([locals()["bidbid_stage_self_"+str(periodo[-1])][u][v][0]['target_name']] + [t]))
        else:
            a.add_edge(u, v, date=[d])
            a.add_edge(u, v, money = m)
            a.add_edge(u, v, stage=[s])
            a.add_edge(u, v, target_name = [t])
    locals()["bidbid_stage_self_single_"+str(periodo[-1])] = a
    nx.readwrite.write_gpickle(locals()["bidbid_stage_self_single_"+str(periodo[-1])], "bidbid_stage_self_single_"+str(periodo[-1])+".gpickle")
    print(periodo[-1])
    

1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021


# COMMUNITIES

In [None]:
partition = community_louvain.best_partition(bidbid)

In [None]:
#group the values
# store the names (the keys of the new dict) as a set (keeps elements unique)
com = set(partition.values())

# use a list comprehension, iterating through keys and checking the values match each n
d = {}
for n in com:
    d[n] = [k for k in partition.keys() if partition[k] == n]

In [None]:
d

In [None]:
len(d)

In [None]:
cb_new.groupby('stage').count()

In [None]:
'Asset Sale' 'Convertible Note' 'Dead' 'Debt' 'Distressed & Special Situation' 'Leveraged Buyout' 'Line of Credit' 'Loan'
'Mezzanine' 'Merger' 'Revenue Finance' 'Project Finance' 'Acq-'

# FONDAZIONI

In [None]:

nodes_org_f = nodes_org[pd.notna(nodes_org['foundation'])]
nodes_org_f.sort_values(
     by="foundation",
     ascending=False)


In [None]:
set(nodes_org_f[nodes_org_f['foundation']>=2000]['target_status'])