In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import pandas_datareader as pdr

In [4]:
#sp_500_series= web.DataReader('^GSPC', 'yahoo', start='2013-01-01', end='2019-12-31') #['Adj Close']
#sp_500_df = sp_500_series.to_frame()
#sp_500_np = sp_500_df[['Adj Close']].values

In [5]:
table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
wiki_sp500 = table[0]
wiki_sp500.to_csv('S&P500-Info.csv')

In [6]:
sp_500_symbols = wiki_sp500['Symbol']

In [8]:
start_date = pd.Timestamp('2013-01-01')
end_date = pd.Timestamp('2019-12-31')
historical_prices = pdr.get_data_yahoo(symbols=sp_500_symbols, start=start_date, end=end_date)



In [11]:
prices = historical_prices['Adj Close']

Date
2013-01-02     75.612129
2013-01-03     75.524376
2013-01-04     76.082802
2013-01-07     76.178520
2013-01-08     76.186501
                 ...    
2019-12-24    168.945236
2019-12-26    168.859161
2019-12-27    169.499847
2019-12-30    168.132446
2019-12-31    168.696625
Name: MMM, Length: 1762, dtype: float64

In [28]:
wiki_relations = pd.read_csv('data/wiki_relations.csv')
board_members = pd.read_csv('data/board_members.csv')
members = pd.read_csv('data/member.csv')
industry_owned = pd.read_csv('data/industry_owned.csv')
wiki_relations.drop(columns=['item', 'itemLabel'], inplace=True)
board_members.drop(columns=['item', 'itemLabel'], inplace=True)

In [29]:
wiki_filtered = wiki_relations.loc[wiki_relations['exchangeLabel'].isin(['NASDAQ','New York Stock Exchange'])]
board_filtered = board_members.loc[board_members['exchangeLabel'].isin(['NASDAQ','New York Stock Exchange'])]
member_filtered = members.loc[members['exchangeLabel'].isin(['NASDAQ','New York Stock Exchange'])]
io_filtered = industry_owned.loc[industry_owned['exchangeLabel'].isin(['NASDAQ','New York Stock Exchange'])]

In [33]:
unique_wiki = wiki_filtered['tickerLabel'].unique()
unique_board = board_filtered['tickerLabel'].unique()
unique_member = member_filtered['tickerLabel'].unique()
unique_io = io_filtered['tickerLabel'].unique()

In [20]:
# create industry graph
industries = io_filtered.groupby('tickerLabel')['industryLabel'].apply(list)
industries = industries.reset_index(name='industries')

In [21]:
# create owned by graph
owned_by = io_filtered.groupby('tickerLabel')['ownedbyLabel'].apply(list)
owned_by = owned_by.reset_index(name='ownedby')

In [31]:
member = member_filtered.groupby('tickerLabel')['memberLabel'].apply(list)
member = member.reset_index(name='members')

In [34]:
board_member = board_filtered.groupby('tickerLabel')['boardmemberLabel'].apply(list)
board_member = board_member.reset_index(name='board_members')

In [22]:
industries.to_csv('data/industries.csv')
owned_by.to_csv('data/owned_by.csv')

In [40]:
member.to_csv('data/members.csv')
board_member.to_csv('data/board_members.csv')

In [25]:
num_nodes = len(unique_io)
industries_graph = np.zeros((num_nodes, num_nodes))
for i in range(num_nodes):
    i_set = set(industries.iloc[i]['industries'])
    for j in range(i+1, num_nodes):
        j_set = set(industries.iloc[j]['industries'])
        if bool(set(i_set) & set(j_set)):
            industries_graph[i, j] = 1


In [27]:
num_nodes = len(unique_io)
ownedby_graph = np.zeros((num_nodes, num_nodes))
for i in range(num_nodes):
    i_set = set(owned_by.iloc[i]['ownedby'])
    for j in range(i+1, num_nodes):
        j_set = set(owned_by.iloc[j]['ownedby'])
        if bool(set(i_set) & set(j_set)):
            ownedby_graph[i, j] = 1

In [35]:
num_nodes = len(unique_board)
board_graph = np.zeros((num_nodes, num_nodes))
for i in range(num_nodes):
    i_set = set(board_member.iloc[i]['board_members'])
    for j in range(i+1, num_nodes):
        j_set = set(board_member.iloc[j]['board_members'])
        if bool(set(i_set) & set(j_set)):
            board_graph[i, j] = 1

In [36]:
num_nodes = len(unique_member)
member_graph = np.zeros((num_nodes, num_nodes))
for i in range(num_nodes):
    i_set = set(member.iloc[i]['members'])
    for j in range(i+1, num_nodes):
        j_set = set(member.iloc[j]['members'])
        if bool(set(i_set) & set(j_set)):
            member_graph[i, j] = 1

In [39]:
ind_sum = industries_graph.sum(axis=0).sum() # 106
own_sum = ownedby_graph.sum(axis=0).sum() # 876
board_sum = board_graph.sum(axis=0).sum() # 6
member_sum = member_graph.sum(axis=0).sum() # 1397

In [41]:
with open('data/industry_graph.npy', 'wb') as f:
    np.save(f, industries_graph)
with open('data/ownedby_graph.npy', 'wb') as f:
    np.save(f, ownedby_graph)
with open('data/member_graph.npy', 'wb') as f:
    np.save(f, member_graph)
with open('data/board_graph.npy', 'wb') as f:
    np.save(f, board_graph)