![](https://www.syl.ru/misc/i/ai/425996/2860180.jpg)

In [None]:
import numpy as np 
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)
import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)
#########################################################
df = pd.read_csv('../input/stock-exchange-data/indexData.csv')
df_p = pd.read_csv('../input/stock-exchange-data/indexProcessed.csv')
info = pd.read_csv('../input/stock-exchange-data/indexInfo.csv')

# Basic information

In [None]:
info

In [None]:
df.head()

In [None]:
df.info()

# Preprocessing

In [None]:
df.dropna(inplace = True)
df.reset_index(drop = True, inplace = True)

In [None]:
new_features = ['p_change', 'close-1', 'close-1%', 'volume-1', 'volume-1%']
for i in new_features:
    df[i] = 0
    df[i] = df[i].astype('float')
    
for k in range(1, len(df)):
    if df['Index'][k] == df['Index'][k-1]:
        df['p_change'][k] = df['Close'][k] - df['Open'][k]
        df['close-1'][k] = df['Close'][k] - df['Close'][k-1]
        df['close-1%'][k] = ((df['Close'][k] / df['Close'][k-1]) * 100) - 100
        if df['Volume'][k] != 0 and df['Volume'][k-1] != 0:
            df['volume-1'][k] = df['Volume'][k] - df['Volume'][k-1]
            df['volume-1%'][k] = ((df['Volume'][k] / df['Volume'][k-1]) * 100) - 100

df['Date'] = pd.to_datetime(df['Date'])
df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month

df.head()

# EDA

In [None]:
def eda():
    stocks = info['Index'].tolist()
    colors = {'NYA': '#41729F',
             'IXIC': '#5885AF',
             'HSI': '#274472',
             '000001.SS': '#C3E0E5',
             'N225': '#145DA0',
             'N100': '#0C2D48',
             '399001.SZ': '#2E8BC0',
             'GSPTSE': '#B1D4E0',
             'NSEI': '#BFD7ED',
             'GDAXI': '#60A3D9',
             'KS11': '#0074B7',
             'SSMI': '#003B73',
             'TWII': '#0E86D4',
             'J203.JO': '#68BBE3'}
    
    for stock in stocks:
        plt.figure(figsize = (15, 7))
        plt.title(stock, size = 35, y = 1.03, fontname = 'monospace')
        plt.grid(color = 'gray', linestyle = ':', axis = 'y', alpha = 0.8, zorder = 0,  dashes = (1,7))
        a = sns.lineplot(x = "Date", y = "Close", data = df.query("Index == @stock"), color = colors.get(stock), linewidth = 0.5)
        plt.ylabel('Close price', size = 14, fontname = 'monospace')
        plt.xlabel('')
        plt.yticks(size = 12, fontname = 'monospace')
    
        for j in ['right', 'top']:
            a.spines[j].set_visible(False)
        for j in ['bottom', 'left']:
            a.spines[j].set_linewidth(1.3)
    
        plt.show()

        fig = plt.figure(figsize = (15, 15))
        plt.subplot(221)
        plt.grid(color = 'gray', linestyle = ':', axis = 'y', alpha = 0.8, zorder = 0,  dashes = (1,7))
        a = sns.lineplot(x = "Date", y = "close-1", data = df.query("Index == @stock"), color = colors.get(stock), linewidth = 0.5)
        plt.ylabel('Price changes', size = 14, fontname = 'monospace')
        plt.xlabel('')
        plt.yticks(size = 12, fontname = 'monospace')

        plt.subplot(222)
        plt.grid(color = 'gray', linestyle = ':', axis = 'y', alpha = 0.8, zorder = 0,  dashes = (1,7))
        b = sns.lineplot(x = "Date", y = "close-1%", data = df.query("Index == @stock"), color = colors.get(stock), linewidth = 0.5)
        plt.ylabel('Price changes %', size = 14, fontname = 'monospace')
        plt.xlabel('')
        plt.yticks(size = 12, fontname = 'monospace')
    
        plt.subplot(223)
        plt.grid(color = 'gray', linestyle = ':', axis = 'y', alpha = 0.8, zorder = 0,  dashes = (1,7))
        c = sns.lineplot(x = "Date", y = "volume-1", data = df.query("Index == @stock"), color = colors.get(stock), linewidth = 0.5)
        plt.ylabel('Volume changes', size = 14, fontname = 'monospace')
        plt.xlabel('')
        plt.yticks(size = 12, fontname = 'monospace')
    
        plt.subplot(224)
        plt.grid(color = 'gray', linestyle = ':', axis = 'y', alpha = 0.8, zorder = 0,  dashes = (1,7))
        d = sns.lineplot(x = "Date", y = "volume-1%", data = df.query("Index == @stock"), color = colors.get(stock), linewidth = 0.5)
        plt.ylabel('Volume changes %', size = 14, fontname = 'monospace')
        plt.xlabel('')
        plt.yticks(size = 12, fontname = 'monospace')

        for i in [a,b,c,d]:
            for j in ['right', 'top']:
                i.spines[j].set_visible(False)
            for j in ['bottom', 'left']:
                i.spines[j].set_linewidth(1.3)
        
        plt.figtext(0.5, -0.001, 'whitespace', color = 'white')
    
        plt.show()

In [None]:
eda()

In [None]:
corr_map = pd.DataFrame()
for stock in info['Index'].tolist():
    corr_map[stock] = df.query("Index == @stock & year >= 2012")['Close'].reset_index()['Close']

matrix = np.triu(corr_map.corr())
plt.figure(figsize = (12, 10))
sns.heatmap(corr_map.corr(), annot = True, cmap = 'Blues', fmt=".2f", mask = matrix, vmin = -1, vmax = 1, linewidths = 0.1, linecolor = 'white', cbar = False, annot_kws = {'fontsize': 11})
plt.xticks(size = 10, fontname = 'monospace')
plt.yticks(size = 11, fontname = 'monospace')
plt.figtext(0.88, 0.65, '''Correlation map from
2012 to 2021
for all
stocks''', fontsize = 40, fontname = 'monospace', ha = 'right', color = '#4897d8')
plt.show()