In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import iplot, init_notebook_mode
from collections import OrderedDict

In [None]:
#List of IT companies HCL , Infosys, TCS, Tach Mahindra, WIPRO

it_datasets = ['HCLTECH__EQ__NSE__NSE__MINUTE.csv','INFY__EQ__NSE__NSE__MINUTE.csv','TCS__EQ__NSE__NSE__MINUTE.csv', 'TECHM__EQ__NSE__NSE__MINUTE.csv','WIPRO__EQ__NSE__NSE__MINUTE.csv',"NIFTY_50__EQ__INDICES__NSE__MINUTE.csv"]

In [None]:
# Function to change the time-freq of the data into an hour-wise, day-wise, month-wise etc data 
def change_df(df, freq='1d'):
    data = df.set_index(pd.DatetimeIndex(df['timestamp']))
    data = data.resample(freq).agg(
        OrderedDict([
            ('open', 'first'),
            ('high', 'max'),
            ('low', 'min'),
            ('close', 'last'),
            ('volume', 'sum'),
        ])
    )
    return data

In [None]:
#function that Plots the candlestick representation of data for passed frequency 
def plot_candlestick(df,freq='1d'):
    new_data = change_df(df,freq)
    fig = go.Figure(data=[go.Candlestick(x=new_data.index,
                    open=new_data['open'],
                    high=new_data['high'],
                    low=new_data['low'],
                    close=new_data['close'])])
    fig.show()

In [None]:

def plot_candlestick(df,freq='1d'):
    new_data = change_df(df,freq)
    fig = go.Figure(data=[go.Candlestick(x=new_data.index,
                    open=new_data['open'],
                    high=new_data['high'],
                    low=new_data['low'],
                    close=new_data['close'])])
    fig.show()

In [None]:
# Plot candlesticks for a given dataframe,for a day, week, month, and quarter time-frequencies
def plot_all_candlesticks(df):
    time_periods=['','1H','1D','1W','1M','1Q']
    for i in range(len(time_periods)):
        print("Time period: ",time_periods[i])
        plot_candlestick(df,time_periods[i])

In [None]:
# Minutes candle sticks for less datasets
def plot_all_candlesticks_minutes(df):
    time_periods=['1T','5T','10T','15T','30T']
    for i in range(len(time_periods)):
        print("Time period: ",time_periods[i])
        plot_candlestick(df.head(5000),time_periods[i])

In [None]:
# HCL Candle sticks for minutes
plot_all_candlesticks_minutes(pd.read_csv(it_datasets[0]))

In [None]:
# Infosys Candle sticks for minutes
plot_all_candlesticks_minutes(pd.read_csv(it_datasets[1]))

In [None]:
# TCS Candle sticks for minutes
plot_all_candlesticks_minutes(pd.read_csv(it_datasets[2]))

In [None]:
# Techmahindra Candle sticks for minutes
plot_all_candlesticks_minutes(pd.read_csv(it_datasets[3]))

In [None]:
# Wipro Candle sticks for minutes
plot_all_candlesticks_minutes(pd.read_csv(it_datasets[4]))

In [None]:
# HCL Candle sticks
plot_all_candlesticks(pd.read_csv(it_datasets[0]))

In [None]:
# Infosys dataset
plot_all_candlesticks(pd.read_csv(it_datasets[1]))

In [None]:
# TCS dataset
plot_all_candlesticks(pd.read_csv(it_datasets[2]))

In [None]:
# Tech Mahindra dataset
plot_all_candlesticks(pd.read_csv(it_datasets[3]))

In [None]:
# Wipro dataset
plot_all_candlesticks(pd.read_csv(it_datasets[4]))

In [None]:
change_df(pd.read_csv(it_datasets[0]))['close'].rename(columns={'close':'HCL'})

In [None]:
#create df with closing price of all companies for plotting comparison of trend 
concat_df = pd.concat([change_df(pd.read_csv(it_datasets[0]))['close'], change_df(pd.read_csv(it_datasets[1]))['close'], change_df(pd.read_csv(it_datasets[2]))['close'], change_df(pd.read_csv(it_datasets[3]))['close'],change_df(pd.read_csv(it_datasets[4]))['close']], axis=1)

In [None]:
#List of IT companies HCL , Infosys, TCS, Tach Mahindra, WIPRO
concat_df.columns = ['HCL','Infosys','TCS','TechMahindra','Wipro']

In [None]:
concat_df.head()

In [None]:
concat_df.plot(figsize=(10,10))

In [None]:
for i, col in enumerate(concat_df.columns):
    plt.figure()
    concat_df[col].plot()
    plt.title('Closing Prices')
    plt.xticks(rotation=70)
    plt.legend(col)

In [None]:
# volume data for 5 different ITs
concat_volume = pd.concat([change_df(pd.read_csv(it_datasets[0]))['volume'], change_df(pd.read_csv(it_datasets[1]))['volume'], change_df(pd.read_csv(it_datasets[2]))['volume'], change_df(pd.read_csv(it_datasets[3]))['volume'],change_df(pd.read_csv(it_datasets[4]))['volume']], axis=1)

In [None]:
#List of IT companies HCL , Infosys, TCS, Tach Mahindra, WIPRO
concat_volume.columns = ['HCL','Infosys','TCS','TechMahindra','Wipro']

In [None]:
concat_volume.dropna(inplace=True)
concat_volume.head()

In [None]:
concat_df.plot(figsize=(10,10))

In [None]:
concat_df.dropna(inplace=True)
concat_df.head()

In [None]:
concat_df[concat_df.HCL == concat_df.HCL.max()]

In [None]:
concat_df[concat_df.HCL == concat_df.HCL.min()]

In [None]:
#nifty50 dataset is read 
nifty_50 =pd.read_csv("NIFTY_50__EQ__INDICES__NSE__MINUTE.csv" ,parse_dates =["timestamp"], index_col ="timestamp")

In [None]:
nifty_50.head()

In [None]:
nifty_50.isnull().sum()

In [None]:
nifty_50.dropna(inplace=True)

In [None]:
nifty_50.isnull().sum()

In [None]:
#observing data distribution of nifty 50 dataset 
nifty_50.describe()

In [None]:
nifty_50.max()

In [None]:
concat_df_nifty_50 = pd.concat([change_df(pd.read_csv(it_datasets[0]))['close'], change_df(pd.read_csv(it_datasets[1]))['close'], change_df(pd.read_csv(it_datasets[2]))['close'], change_df(pd.read_csv(it_datasets[3]))['close'],change_df(pd.read_csv(it_datasets[4]))['close'],change_df(pd.read_csv(it_datasets[5]))['close']], axis=1)

In [None]:
concat_df_nifty_50.head()
#List of IT companies HCL , Infosys, TCS, Tach Mahindra, WIPRO and Nifty_50
concat_df_nifty_50.columns = ['HCL','Infosys','TCS','TechMahindra','Wipro',"Nifty_50"]

In [None]:
#comparing nifty 50 trend with that of other stocks 
concat_df_nifty_50.plot(figsize=(10,10))

In [None]:
combo = pd.DataFrame()

In [None]:
combo['nift50'] = concat_df_nifty_50.Nifty_50
combo['HCL'] = concat_df_nifty_50.HCL
combo['Infosys'] = concat_df_nifty_50.Infosys
combo['TCS'] = concat_df_nifty_50.TCS
combo['TechMahindra'] = concat_df_nifty_50.TechMahindra
combo['Wipro'] = concat_df_nifty_50.Wipro


#concat_df_nifty_50.columns = ['HCL','Infosys','TCS','TechMahindra','Wipro',"Nifty_50"]

In [None]:
combo.dropna(inplace=True)

In [None]:
import seaborn as sns

In [None]:
sns.distplot(combo['nift50'], kde_kws={'label':'nift50'});
sns.distplot(combo['HCL'], kde_kws={'label':'HCL'});
sns.distplot(combo['Infosys'], kde_kws={'label':'Infosys'});
sns.distplot(combo['TCS'], kde_kws={'label':'TCS'});
sns.distplot(combo['TechMahindra'], kde_kws={'label':'TechMahindra'});
sns.distplot(combo['Wipro'], kde_kws={'label':'Wipro'});


In [None]:
sns.pairplot(combo)

In [None]:
combo.corr()

In [None]:
sns.heatmap(combo.corr(),annot=True)

In [None]:
sns.clustermap(combo.corr(),annot=True)