# CAC40
### This article will discuss about CAC40 and its components. We will analyze their correlation, their trends...

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import pandas_datareader.data as web
import datetime
import seaborn as sns 
from scipy import stats
from scipy.stats import norm
from scipy.stats import skew
from sklearn.preprocessing import StandardScaler
sns.set(style = "whitegrid", color_codes = True)
sns.set(font_scale = 1)
from astropy.table import Table, Column

#list of all components of CAC40
components =['SOLB','LHN.PA','NOKIA.PA','ACA.PA','ATO.PA','SAF.PA','AI.PA','CA.PA','FP.PA','OR.PA','AC.PA','EN.PA','SAN','CS.PA','BN.PA','RI.PA','MC.PA','SW.PA','ML.PA','KER.PA','UG.PA','EI.PA','SU.PA','VIE.PA','UL','SGO.PA','CAP.PA','DG.PA','VIV.PA','PUB.PA','GLE.PA','BNP.PA','RNO.PA','ORA.PA','ENGI.PA','LR.PA','FR.PA','FTI.PA','MT.PA','AIR.PA']

#index Cac40
Cac = ['^FCHI']

#Choose Close Value
ls_key = 'Close'
start = datetime.datetime(2016, 12, 1)
end = datetime.datetime(2017, 4, 26)
df_component = web.DataReader(components, 'yahoo',start,end)
DataComponent = df_component.ix[ls_key]
DataComponent.rename(columns = {'SOLB':'Solvay','LHN.PA':'LafargeHolcim','NOKIA.PA':'Nokia','ACA.PA':'Credit Agricole','ATO.PA':'Atos','SAF.PA':'Safran','AI.PA':'Air Liquide','CA.PA':'Carrefour','FP.PA':'Total','OR.PA':'Loreal','AC.PA':'Accor Hotels','EN.PA':'Bouygues','SAN':'Sanofi','CS.PA':'Axa','BN.PA':'Danone','RI.PA':'Pernod Ricard','MC.PA':'Lvmh','SW.PA':'Sodexo','ML.PA':'Michelin','KER.PA':'Kering','UG.PA':'Peugeot','EI.PA':'Essilor Intl','SU.PA':'Schneider Electric','VIE.PA':'Veolia Environ','UL':'Unibail-Rodamco','SGO.PA':'Saint Gobain','CAP.PA':'Cap Gemini','DG.PA':'Vinci','VIV.PA':'Vivendi','PUB.PA':'Publicis Groupe','GLE.PA':'Societe Generale','BNP.PA':'Bnp Paribas','RNO.PA':'Renault','ORA.PA':'Orange','ENGI.PA':'Engie','LR.PA':'Legrand SA','FR.PA':'Valeo','FTI.PA':'TechnipFMC','MT.PA':'Arcelor Mittal','AIR.PA':'Airbus'}, inplace= True)
print(DataComponent.head())

df_cac = web.DataReader(Cac, 'yahoo',start,end)
DataCac = df_cac.ix[ls_key]
DataCac.rename(columns = {'^FCHI':'CAC40', '^STOXX50E':'Euro Stoxx 50', '^VIX':'VIX', '^GSPC':'S&P 500'}, inplace= True)
print(DataCac.head())

train = pd.concat([DataComponent,DataCac], axis = 1)
print(train.shape)


In [None]:
#missing data
total = train.isnull().sum().sort_values(ascending=False)
percent = (train.isnull().sum()/train.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
print("Missing data in training test:")
print(missing_data.head(10))
print("-"*25)

#dealing with missing data
train = train.fillna(train.mean())


In [None]:
#Correlation between features
corrMatrix = train.select_dtypes(include = ['Float64','int64']).iloc[:,1:].corr()
plt.figure(figsize=(12,12))
sns.heatmap(corrMatrix,vmax = .8, linewidths = 0.01, square = True, cmap = 'viridis', linecolor ='white')
plt.title('Correlation between features')


In [None]:
#Calculate correlation between variables and target
corr = train.corr()['CAC40']
print("Correlation Coefficients")
print(corr[np.argsort(corr,axis=0)[::-1]])

In [None]:
#scatter plot Cap Gemini/CAC40
var = 'Cap Gemini'
data_CapGemini = pd.concat([train['CAC40'], train[var]], axis=1)
data_CapGemini.plot.scatter(x=var, y='CAC40', ylim=(3000,7000));

In [None]:
#scatter plot Bouygues/CAC40
var = 'Bouygues'
data_Bouygues = pd.concat([train['CAC40'], train[var]], axis=1)
data_Bouygues.plot.scatter(x=var, y='CAC40', ylim=(3000,7000));

In [None]:
sns.distplot(train['CAC40'])