In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import iplot, init_notebook_mode
from collections import OrderedDict

In [None]:
#List of oil and gas companies BPCL , Gail, IOC, ONGC, Reliance

datasets = ['BPCL__EQ__NSE__NSE__MINUTE.csv','GAIL__EQ__NSE__NSE__MINUTE.csv','IOC__EQ__NSE__NSE__MINUTE.csv', 'ONGC__EQ__NSE__NSE__MINUTE.csv','RELIANCE__EQ__NSE__NSE__MINUTE.csv',"NIFTY_50__EQ__INDICES__NSE__MINUTE.csv"]

In [None]:
# Function to change the time-freq of the data into an hour-wise, day-wise, month-wise etc data 
def change_df(df, freq='1d'):
    data = df.set_index(pd.DatetimeIndex(df['timestamp']))
    data = data.resample(freq).agg(
        OrderedDict([
            ('open', 'first'),
            ('high', 'max'),
            ('low', 'min'),
            ('close', 'last'),
            ('volume', 'sum'),
        ])
    )
    return data

In [None]:
#function that Plots the candlestick representation of data for passed frequency 
def plot_candlestick(df,freq='1d'):
    new_data = change_df(df,freq)
    fig = go.Figure(data=[go.Candlestick(x=new_data.index,
                    open=new_data['open'],
                    high=new_data['high'],
                    low=new_data['low'],
                    close=new_data['close'])])
    fig.show()

In [None]:
# Plot candlesticks for a given dataframe,for a day, week, month, and quarter time-frequencies
def plot_all_candlesticks(df):
    time_periods=['1H','1D','1W','1M','1Q']
    for i in range(len(time_periods)):
        print("Time period: ",time_periods[i])
        plot_candlestick(df,time_periods[i])

In [None]:
# Minutes candle sticks for less datasets
def plot_all_candlesticks_minutes(df):
    time_periods=['1T','5T','10T','15T','30T']
    for i in range(len(time_periods)):
        print("Time period: ",time_periods[i])
        plot_candlestick(df.head(5000),time_periods[i])

In [None]:
# BPCL Candle sticks for minutes
plot_all_candlesticks_minutes(pd.read_csv(datasets[0]))

In [None]:
# Gail Candle sticks for minutes 
plot_all_candlesticks_minutes(pd.read_csv(datasets[1]))

In [None]:
# IOC Candle sticks for minutes 
plot_all_candlesticks_minutes(pd.read_csv(datasets[2]))

In [None]:
# ONGC Candle sticks for minutes 
plot_all_candlesticks_minutes(pd.read_csv(datasets[3]))

In [None]:
# Reliance Candle sticks for minutes 
plot_all_candlesticks_minutes(pd.read_csv(datasets[4]))

In [None]:
# BPCL Candle sticks
plot_all_candlesticks(pd.read_csv(datasets[0]))

In [None]:
# GAIL dataset 
plot_all_candlesticks(pd.read_csv(datasets[1]))

In [None]:
# IOC dataset
plot_all_candlesticks(pd.read_csv(datasets[2]))

In [None]:
# ONGC dataset
plot_all_candlesticks(pd.read_csv(datasets[3]))

In [None]:
# Reliance dataset
plot_all_candlesticks(pd.read_csv(datasets[4]))

In [None]:
change_df(pd.read_csv(datasets[0]))['close'].rename(columns={'close':'BPCL'})

In [None]:
#concatinate all closing prices into a single dataframe 
concat_df = pd.concat([change_df(pd.read_csv(datasets[0]))['close'], change_df(pd.read_csv(datasets[1]))['close'], change_df(pd.read_csv(datasets[2]))['close'], change_df(pd.read_csv(datasets[3]))['close'],change_df(pd.read_csv(datasets[4]))['close']], axis=1)

In [None]:
#List of Gas and oil IBPCL , Gail, IOC, ONGC, Reliance
concat_df.columns = ['BPCL','Gail','IOC','ONGC','Reliance']

In [None]:
concat_df.head()

In [None]:
#plotting all closing prices in same graph to compare trend 
concat_df.plot(figsize=(10,10))

In [None]:
for i, col in enumerate(concat_df.columns):
    plt.figure()
    concat_df[col].plot()
    plt.title('Closing Prices')
    plt.xticks(rotation=70)
    plt.legend(col)

In [None]:
# volume data for 5 different Gas and Oil company
concat_volume = pd.concat([change_df(pd.read_csv(datasets[0]))['volume'], change_df(pd.read_csv(datasets[1]))['volume'], change_df(pd.read_csv(datasets[2]))['volume'], change_df(pd.read_csv(datasets[3]))['volume'],change_df(pd.read_csv(datasets[4]))['volume']], axis=1)

In [None]:
#List of Gas and oil IBPCL , Gail, IOC, ONGC, Reliance
concat_volume.columns = ['BPCL','Gail','IOC','ONGC','Reliance']

In [None]:
concat_volume.dropna(inplace=True)
concat_volume.head()

In [None]:
concat_df.plot(figsize=(10,10))

In [None]:
concat_df.dropna(inplace=True)
concat_df.head()

In [None]:
concat_df[concat_df.BPCL == concat_df.BPCL.max()]

In [None]:
concat_df[concat_df.BPCL == concat_df.BPCL.min()]

In [None]:
#reading nifty50 data 
nifty_50 =pd.read_csv("NIFTY_50__EQ__INDICES__NSE__MINUTE.csv" ,parse_dates =["timestamp"], index_col ="timestamp")

In [None]:
nifty_50.head()

In [None]:
#checking for null values 
nifty_50.isnull().sum()

In [None]:
#dropping null values 
nifty_50.dropna(inplace=True)

In [None]:
nifty_50.isnull().sum()

In [None]:
nifty_50.describe()

In [None]:
nifty_50.max()

The NIFTY 50 is a benchmark Indian stock market index that represents the weighted average of 50 of the largest Indian companies listed on the National Stock Exchange. It is one of the two main stock indices used in India

In [None]:
#concating nifty50 closing prices to dataframe 
concat_df_nifty_50 = pd.concat([change_df(pd.read_csv(datasets[0]))['close'], change_df(pd.read_csv(datasets[1]))['close'], change_df(pd.read_csv(datasets[2]))['close'], change_df(pd.read_csv(datasets[3]))['close'],change_df(pd.read_csv(datasets[4]))['close'],change_df(pd.read_csv(datasets[5]))['close']], axis=1)

In [None]:
concat_df_nifty_50.head()
#List of Gas nad oil IBPCL , Gail, IOC, ONGC, Reliance
concat_df_nifty_50.columns = ['BPCL','Gail','IOC','ONGC','Reliance','Nifty_50']

In [None]:
#we compare the variation of nifty 50 closing price trend with that of other companies in the considered sector
concat_df_nifty_50.plot(figsize=(10,10))

In [None]:
combo = pd.DataFrame()

In [None]:
combo['nift50'] = concat_df_nifty_50.Nifty_50
combo['BPCL'] = concat_df_nifty_50.BPCL
combo['Gail'] = concat_df_nifty_50.Gail
combo['IOC'] = concat_df_nifty_50.IOC
combo['ONGC'] = concat_df_nifty_50.ONGC
combo['Reliance'] = concat_df_nifty_50.Reliance


In [None]:
combo.dropna(inplace=True)

In [None]:
import seaborn as sns

In [None]:
#A Distplot or distribution plot, depicts the variation in the data distribution
sns.distplot(combo['nift50'], kde_kws={'label':'nift50'});
sns.distplot(combo['BPCL'], kde_kws={'label':'BPCL'});
sns.distplot(combo['Gail'], kde_kws={'label':'Gail'});
sns.distplot(combo['IOC'], kde_kws={'label':'IOC'});
sns.distplot(combo['ONGC'], kde_kws={'label':'ONGC'});
sns.distplot(combo['Reliance'], kde_kws={'label':'Reliance'});


In [None]:
#plotting pairplot to show pairwise distibution of stock prices under consideration
sns.pairplot(combo)

In [None]:
#correlation is plotted and heatmap to visualise this 
combo.corr()

In [None]:
sns.heatmap(combo.corr(),annot=True)

In [None]:
sns.clustermap(combo.corr(),annot=True)