# Basic EDA

In [None]:
!pip install -q yfinance
import yfinance as yf

!pip install -q missingno
import missingno as msno

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

from sklearn import impute

import warnings
warnings.filterwarnings('ignore')

In [None]:
data=pd.read_csv(r'/kaggle/input/stoxx50financialratios/db.csv')
data['Date']=pd.to_datetime(data.Date)

data=data.dropna(axis=1,how='all')
tickers=data.Ticker.unique().tolist()

In [None]:
data=data.groupby(['Date','Ticker']).apply(lambda x:x.ffill().bfill()).set_index('Date')

In [None]:
display(data)

In [None]:
data.describe()

# Checking missing values

In [None]:
msno.matrix(data.iloc[:,1:48],figsize=(30,15))

In [None]:
msno.heatmap(data.iloc[:,1:48],figsize=(30,15))

In [None]:
msno.bar(data.iloc[:,1:48],figsize=(30,15),color='brown')

# Imputing with KNN Imputer

In [None]:
from sklearn.impute import KNNImputer
from sklearn.preprocessing import LabelEncoder

data_=pd.DataFrame(KNNImputer().fit_transform(pd.get_dummies(data)),index=data.index)
data_=data_.iloc[:,:48]
data_.columns=data.columns[:-1]
data_['Ticker']=data.Ticker

# Correlation heatmaps

In [None]:
fig,ax=plt.subplots(2,2,figsize=(30,25))
for i,ticker in enumerate(tickers[:4]):#First 4 tickers
    sns.heatmap(data_[data_.Ticker==ticker].iloc[:,:-1].corr(),ax=ax[int(i/2)][i%2])
    ax[int(i/2)][i%2].set_title(ticker)

fig.tight_layout()
plt.show()

# Features most positively and negatively correlated to Closing prices

In [None]:
px=yf.download(tickers=tickers,start='2000-01-01')['Close']\
    .asfreq('D')\
    .melt(var_name='Ticker',value_name='px',ignore_index=False)

In [None]:
px=px.reset_index().merge(data_.reset_index(),on=['Date','Ticker'],how='left').ffill().bfill().set_index('Date')

In [None]:
fs1={}
fs2={}

for ticker in tickers:
    fs1[ticker]=px[px.Ticker==ticker].corr().px.sort_values().dropna().iloc[:-1].index[-1]
    fs2[ticker]=px[px.Ticker==ticker].corr().px.sort_values(ascending=False).dropna().index[-1]

In [None]:
#Positive correlations

fig,ax=plt.subplots(10,5,figsize=(30,40))

for i,ticker in enumerate(tickers):
    ax[int(i/5)][i%5].plot(px[px.Ticker==ticker].px)
    ax_=ax[int(i/5)][i%5].twinx()
    ax_.plot(px[px.Ticker==ticker][fs1[ticker]],color='green')
    ax[int(i/5)][i%5].set_title(ticker+', '+fs1[ticker])
    
plt.tight_layout()
plt.show()

In [None]:
#Negative correlations

fig,ax=plt.subplots(10,5,figsize=(30,40))

for i,ticker in enumerate(tickers):
    ax[int(i/5)][i%5].plot(px[px.Ticker==ticker].px)
    ax_=ax[int(i/5)][i%5].twinx()
    ax_.plot(px[px.Ticker==ticker][fs2[ticker]],color='brown')
    ax[int(i/5)][i%5].set_title(ticker+', '+fs2[ticker])
    
plt.tight_layout()
plt.show()