In [1]:
# p.102 4.7 Affinity Propagationモデルを使った
# 株式市場のサブグループ検出
import datetime
import json
import numpy as np
from sklearn import covariance, cluster
import pandas as pd
import pandas_datareader.data as pdd

In [None]:
input_file = 'company_symbol_mapping.json'

with open(input_file, 'r') as f:
    company_symbols_map = json.loads(f.read())
    
symbols = company_symbols_map.keys()

#https://www.quandl.com/account/profile
QUANDL_API_KEY = 'XXXXXXXXXXXXXXXXXXXX'

start_date = datetime.datetime(2003, 7, 3)
end_date = datetime.datetime(2007, 5, 4)

quotes = []
names = []

for symbol in symbols:
    try:
        print('Loading ', symbol, end='...')
        d = pdd.DataReader('WIKI/' + symbol, 'quandl', start_date,
                          end_date,
                          access_key = QUANDL_API_KEY)
        print('done')
        quotes.append(d)
        names.append(company_symbols_map[symbol])
    except:
        print('not found.')
names = np.array(names)

In [7]:
opening_quotes = np.array([quote['Open'] for quote in quotes]).astype(np.float)
closing_quotes = np.array([quote['Close'] for quote in quotes]).astype(np.float)
quotes_diff = closing_quotes - opening_quotes

X = quotes_diff.copy().T
X /= X.std(axis=0)

edge_model = covariance.GraphicalLassoCV(cv=3)

with np.errstate(invalid='ignore'):
    edge_model.fit(X)
    
_, labels = cluster.affinity_propagation(edge_model.covariance_)
num_labels = labels.max()

for i in range(num_labels + 1):
    print("Cluster", i+1, "==>", ','.join(names[labels == i]))

Cluster 1 ==> Exxon,Chevron,ConocoPhillips,Valero Energy
Cluster 2 ==> Yahoo,Amazon,Apple
Cluster 3 ==> Ford,Navistar,Caterpillar
Cluster 4 ==> Kraft Foods
Cluster 5 ==> Coca Cola,Pepsi,Kellogg,Procter Gamble,Colgate-Palmolive,Kimberly-Clark
Cluster 6 ==> Comcast,Mc Donalds,Marriott,Wells Fargo,JPMorgan Chase,AIG,American express,Bank of America,Goldman Sachs,Xerox,Wal-Mart,Home Depot,Pfizer,Ryder
Cluster 7 ==> Microsoft,IBM,Time Warner,HP,3M,General Electrics,Cisco,Texas instruments
Cluster 8 ==> Walgreen,CVS
Cluster 9 ==> Northrop Grumman,Boeing,General Dynamics,Raytheon
