In [None]:
import datetime
import json

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import covariance, cluster
from matplotlib.finance import quotes_historical_yahoo_ochl as quotes_yahoo

In [None]:
# Input file containing company symbols 
input_file = 'company_symbol_mapping.json'

In [None]:
# Load the company symbol map
with open(input_file, 'r') as f:
    company_symbols_map = json.loads(f.read())

In [None]:
symbols, names = np.array(list(company_symbols_map.items())).T

In [None]:
# Load the historical stock quotes 
start_date = datetime.datetime(2003, 7, 3)
end_date = datetime.datetime(2007, 5, 4)
quotes = [quotes_yahoo(symbol, start_date, end_date, asobject=True) 
                for symbol in symbols]

In [None]:
# Extract opening and closing quotes
opening_quotes = np.array([quote.open for quote in quotes]).astype(np.float)
closing_quotes = np.array([quote.close for quote in quotes]).astype(np.float)

In [None]:
# Compute differences between opening and closing quotes 
quotes_diff = closing_quotes - opening_quotes

In [None]:
# Normalize the data 
X = quotes_diff.copy().T
X /= X.std(axis=0)

In [None]:
# Create a graph model 
edge_model = covariance.GraphLassoCV()

In [None]:
# Train the model
with np.errstate(invalid='ignore'):
    edge_model.fit(X)

In [None]:
# Build clustering model using Affinity Propagation model
_, labels = cluster.affinity_propagation(edge_model.covariance_)
num_labels = labels.max()

In [None]:
# Print the results of clustering
print('\nClustering of stocks based on difference in opening and closing quotes:\n')
for i in range(num_labels + 1):
    print("Cluster", i+1, "==>", ', '.join(names[labels == i]))