In [56]:
import datetime as dt
import os
import pandas as pd
import pickle
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np

In [57]:
style.use('ggplot')

## Unione dati aziende S&P 500

In [58]:
def compile_data():
    with open('sp500ticker.pickle', 'rb') as f:
        tickers = pickle.load(f)

    main_df = pd.DataFrame()

    for count, ticker in enumerate(tickers):
        df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
        df.set_index('Date', inplace=True)

        df.rename(columns = {'Adj Close': ticker}, inplace=True)
        df.drop(['Open','High','Low','Close','Volume'], 1, inplace=True)

        if main_df.empty:
            main_df = df
        else: 
            main_df = main_df.join(df, how='outer')

        if count % 100 == 0:
            print(count)
            
    print(main_df.head())
    main_df.to_csv('sp500_joined_closes.csv')

In [59]:
compile_data()

0
100
200
300
400
500
                  MMM       ABT  ABBV      ABMD  ACN      ATVI       ADBE  \
Date                                                                        
2000-01-03  26.346825  6.592210   NaN  18.25000  NaN  1.235628  16.274673   
2000-01-04  25.299936  6.403859   NaN  17.81250  NaN  1.198042  14.909401   
2000-01-05  26.032759  6.392088   NaN  18.00000  NaN  1.202740  15.204173   
2000-01-06  28.126556  6.615753   NaN  18.03125  NaN  1.179249  15.328290   
2000-01-07  28.684900  6.686383   NaN  17.93750  NaN  1.212137  16.072983   

               AMD  AAP        AES  ...  WYNN       XEL        XRX       XLNX  \
Date                                ...                                         
2000-01-03  15.500  NaN  28.582567  ...   NaN  7.782174  52.591202  33.437664   
2000-01-04  14.625  NaN  27.449125  ...   NaN  7.961369  50.145103  32.677715   
2000-01-05  15.000  NaN  27.744810  ...   NaN  8.268555  52.727116  31.962463   
2000-01-06  16.000  NaN  27.96656

### Correlazione di Pearson

In [60]:
df = pd.read_csv('sp500_joined_closes.csv')

Correlazione classica (basata sul prezzo)

In [61]:
num_of_stocks = 10

In [62]:
# df_corr = df.corr()

# Prendo solo le prime 10 così da non complicare troppo il plot
#df_corr = df.corr().iloc[:num_of_stocks,:num_of_stocks]

Più corretto del blocco precedente perchè andiamo a paragonare i cambiamenti (in %) e non il prezzo come fatto in precedenza

In [63]:
df.set_index('Date', inplace=True)
df_corr = df.pct_change().corr(method='pearson').iloc[:num_of_stocks,:num_of_stocks]

In [64]:
df_corr.head(10)

Unnamed: 0,MMM,ABT,ABBV,ABMD,ACN,ATVI,ADBE,AMD,AAP,AES
MMM,1.0,0.371056,0.355435,0.239829,0.405725,0.284419,0.367514,0.275526,0.318081,0.267034
ABT,0.371056,1.0,0.431787,0.199452,0.298373,0.195455,0.228945,0.169157,0.237997,0.178901
ABBV,0.355435,0.431787,1.0,0.223734,0.326878,0.215362,0.319212,0.158979,0.200883,0.21221
ABMD,0.239829,0.199452,0.223734,1.0,0.251986,0.196238,0.257796,0.192561,0.166518,0.159884
ACN,0.405725,0.298373,0.326878,0.251986,1.0,0.328469,0.385512,0.301062,0.260642,0.279881
ATVI,0.284419,0.195455,0.215362,0.196238,0.328469,1.0,0.334226,0.287719,0.236782,0.217308
ADBE,0.367514,0.228945,0.319212,0.257796,0.385512,0.334226,1.0,0.364875,0.26492,0.243266
AMD,0.275526,0.169157,0.158979,0.192561,0.301062,0.287719,0.364875,1.0,0.178758,0.229645
AAP,0.318081,0.237997,0.200883,0.166518,0.260642,0.236782,0.26492,0.178758,1.0,0.20608
AES,0.267034,0.178901,0.21221,0.159884,0.279881,0.217308,0.243266,0.229645,0.20608,1.0


### Show heatmap plot

In [65]:
#%matplotlib auto

data = df_corr.values
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn) # Red Yellow Green
fig.colorbar(heatmap)

ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False)
ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False)

ax.invert_yaxis() # così row1 col1 sono uguali, row2, col2 idem, etc.
ax.xaxis.tick_top() # ticks on top e non bottom


column_labels = df_corr.columns
row_labels = df_corr.index

ax.set_xticklabels(column_labels)
ax.set_yticklabels(row_labels)

plt.xticks(rotation=90) # ruoto di 90° i tick sull'asse delle x
heatmap.set_clim(-1,1)

plt.tight_layout()
plt.show()

### Metodo più veloce per stampare tutto

In [67]:
#%matplotlib auto
import seaborn as sns

#sns.heatmap(df_corr,annot=True,cmap=plt.cm.RdYlGn)
#sns.heatmap(df_corr,annot=True,fmt='.2g',vmin=-1, vmax=1, center= 0, cbar_kws= {'orientation': 'horizontal'})
sns.heatmap(df_corr,annot=True,fmt='.2g',vmin=-1, vmax=1, center= 0)
plt.show()