In [None]:
# Create a correlation-based network for 30 Dow Jones companies.
# Compute correlations using the closing stock prices for each of 30 companies.

In [2]:
## Preliminary arrangements: which companies and dates to choose?
import pandas as pd
import numpy as np
dfsym=pd.read_csv('data/companylist.csv',float_precision='high')
dfsym=dfsym[dfsym['MarketCap'] > 1000000]
companies=dfsym['Symbol'].tolist()

In [8]:
from pandas_datareader import data, wb
import traceback
def get_data(i):
    print (i)
    try:
        raw_data = data.DataReader(i, 'google', start='01/01/2012', end='30/06/2014')
    except:
        traceback.print_exc()
    if len(raw_data) > 0:
        print('here')
        data_df = raw_data
        data_df['Key'] = i
        data_df.reset_index(inplace=True) 
        return data_df

In [None]:
## Download the data
from multiprocessing import Pool
data_list = []

p = Pool(20)
data_list=p.map(get_data, companies)
df_data = pd.concat(data_list)

In [9]:
O=get_data('AAPL')
O

AAPL


Traceback (most recent call last):
  File "<ipython-input-8-5ace500cc628>", line 6, in get_data
    raw_data = data.DataReader(i, 'google', start='01/01/2012', end='30/06/2014')
  File "/opt/conda/lib/python3.6/site-packages/pandas_datareader/data.py", line 133, in DataReader
    session=session).read()
  File "/opt/conda/lib/python3.6/site-packages/pandas_datareader/base.py", line 157, in read
    params=self._get_params(self.symbols))
  File "/opt/conda/lib/python3.6/site-packages/pandas_datareader/base.py", line 74, in _read_one_data
    out = self._read_url_as_StringIO(url, params=params)
  File "/opt/conda/lib/python3.6/site-packages/pandas_datareader/base.py", line 85, in _read_url_as_StringIO
    response = self._get_response(url, params=params)
  File "/opt/conda/lib/python3.6/site-packages/pandas_datareader/base.py", line 120, in _get_response
    raise RemoteDataError('Unable to read URL: {0}'.format(url))
pandas_datareader._utils.RemoteDataError: Unable to read URL: http://w

UnboundLocalError: local variable 'raw_data' referenced before assignment

In [None]:
import scipy.signal
data_detrd={}
for key in data_dict:
    data_detrd[key] = pd.Series(scipy.signal.detrend(data_dict[key]))
data_dict = data_detrd

In [None]:
data_dict

In [None]:
## A quick visualization: stock prices for each Dow Jones company

import pylab
import random as rn

colors = 'bcgmry'
rn.seed = len(companies)  # for choosing random colors
pylab.subplot('111')  # all time series on a single figure

for i in data_dict:
    data_dict[i].plot(style=colors[rn.randint(0, len(colors) - 1)])
pylab.show()

In [None]:
## Compute correlation matrix

import numpy as np
n = len(data_dict)
corr_matrix = np.zeros((n, n))
cdict=list(data_dict.keys())

for i in range(0, n):
    for j in range(0, n):
        if i < j:
            corr_matrix[i][j] = data_dict[cdict[i]].corr(
                                                    data_dict[cdict[j]],
                                                    method='pearson')

# Output
np.set_printoptions(precision=2)
print (corr_matrix[0])

In [None]:
## Remove weak correlations to construct a graph
threshold = 0.8

def elength(x):
    if abs(x) < threshold:
        return 0
    else:
        return round((((1-x)*2)**0.5))

vfunc = np.vectorize(elength)
corr_matrixo=vfunc(corr_matrix)

# Output
print (corr_matrixo[0])

In [None]:
# Constructing a graph
import networkx as nx
G = nx.Graph(corr_matrixo)

In [None]:
## Explore graph properties

nodes, edges = G.order(), G.size()
print (nx.info(G))

In [None]:
%matplotlib inline
nx.draw(G)

In [None]:
from networkx.readwrite import json_graph
G.remove_nodes_from(nx.isolates(G)) 
data = json_graph.node_link_data(G)

In [None]:
from IPython.core.display import display,HTML
import d3_lib
HTML(d3_lib.set_styles(['basic_axis','basic_line','basic_scatter','force_directed_graph','day-hr-heatmap']))
HTML('<script src="lib/d3/d3.min.js"></script>')

In [None]:
keys = ['nodes', 'links']

filtered_d = dict((k, data[k]) for k in keys if k in data)


In [None]:
display(HTML(d3_lib.draw_graph('force_directed_graph',{'data': filtered_d})))

In [None]:
for j in filtered_d['links']:
    j['value'] = j.pop('weight')

In [None]:
filtered_d['links']

In [None]:
from pandas_datareader import data, wb
df=data.DataReader('AAPL', 'google', start='01/01/2000', end='30/06/2017')
df['key'] = 'AAPL'
df2=df.reset_index()

In [None]:
df=data.DataReader('AAPL', 'google', start='01/01/2000', end='30/06/2017')
df['key'] = 'AAPL'
df3=df.reset_index()

In [None]:
pd.concat([df2,df3])