## FOC-CRISIS school, 24-27 Oct 2012, Lucca
##
## Tutorial: Analysing financial networks in Python
## by Michelangelo Puliga (ETH Zurich)
##
## http://nbviewer.ipython.org/3950921/

## Preliminary arrangements: which companies and dates to choose?

# UNH replaced KFT on 24 Sep 2012; the first change since 2009. # http://www.bloomberg.com/news/2012-09-14/unitedhealth-replaces-kraft-in- # dow-jones-industrial-average-1-.html # => # We can take the data e.g. from 01 Jan 2010 to 23 Sep 2012, # when the Dow Jones companies stayed the same

# Dow Jones companies
companies = ['MMM', 'AA', 'AXP', 'T', 'BAC',
             'BA', 'CAT', 'CVX', 'CSCO', 'DD',
             'XOM', 'GE', 'HPQ', 'HD', 'INTC',
             'IBM', 'JNJ', 'JPM', 'MDC', 'MRK',
             'MSFT', 'PFE', 'PG', 'KO', 'TRV',
             'UTX', 'VZ', 'WMT', 'DIS', 'KFT']

## Download the data
from pandas.io.data import *
data = dict()
for i in companies:
raw_data = DataReader(i, 'yahoo', start='01/01/2009', end='23/09/2012')
data[i] = raw_data['Close']  # we need closing prices only

## A quick visualization
import pylab
import random as rn
colors = 'bcgmry'
rn.seed = len(companies)  # for choosing random colors
pylab.subplot('111')  # all time series on a single figure
for i in companies:
data[i].plot(style=colors[rn.randint(0, len(colors) - 1)])
pylab.show()

## Compute correlation matrix
import numpy as np
n = len(companies)
corr_matrix = np.zeros((n, n))
for i in range(0, n):
for j in range(0, n):
if i < j:
corr_matrix[i][j] = data[companies[i]].corr(
data[companies[j]],
method='pearson')
# Output
np.set_printoptions(precision=2)
print corr_matrix[0]

## Remove weak correlations to construct a graph
threshold = 0.7
corr_matrix[np.where(abs(corr_matrix) < threshold)] = 0
# Output
print corr_matrix[0]

# Constructing a graph
import networkx as nx
G = nx.Graph(corr_matrix)

# Connected components: color them differently
rn.seed = 5  # for choosing random colors
components = nx.connected_components(G)
for i in components:
component = G.subgraph(i)
nx.draw_graphviz(component,
node_color = colors[rn.randint(0, len(colors) - 1)],
node_size = [component.degree(i)  100 + 15
for i in component.nodes()],
edge_color = [corr_matrix[i][j]  0.5
for (i, j) in component.edges()],
with_labels = True,
labels = dict([(x, companies[x]) for x in component.nodes()])
)
pylab.show()
print "Smallest components (size < 5):"
for i in components:
if len(i) < 5:
print [companies[j] for j in i]
print "Companies with degrees < 5:"
print [(companies[i], degrees[i]) for i in range(0, n) if degrees[i] < 5]

## Explore graph properties
nodes, edges = G.order(), G.size()
print "Number of nodes:", nodes
print "Number of edges:", edges
print "Average degree:", edges / float(nodes)

## Count degrees
degrees = G.degree()
values = sorted(set(degrees.values()))
counts = [degrees.values().count(x) for x in values]

# Generate colors -
# http://stackoverflow.com/questions/876853/generating-
# color-ranges-in-python
import colorsys
ncolors = len(values)
HSV_tuples = [(x  1.0 / ncolors, 0.5, 0.5) for x in range(ncolors)]
RGB_tuples = map(lambda x: colorsys.hsv_to_rgb(x), HSV_tuples)

# Plot degree distribution
pylab.xlabel('Degree')
pylab.ylabel('Number of nodes')
pylab.title('Dow Jones network: degree distribution')
pylab.bar(values, counts, color=RGB_tuples)
pylab.show()
print "Highest degree:", max(values)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

cnetwork.md

cnetwork.md

Files

cnetwork.md

Latest commit

History

cnetwork.md

File metadata and controls