# #So We had a lot of csv files containing monthly 1 minute data from 2008 to 2021
 ## We needed to merge it into one single file, so that it's easy to analyse the data as a whole
 ## rather than reading each single file

In [1]:
import pandas as pd
import glob
import os
import datetime

In [2]:
# merging the files
# We used regex to merge all file with names like 2010 MAR BNF.csv, 2021 APR BNF.csv etc, 
#we used regex *BNF.csv to select all of them


joined_files_bnf = os.path.join("*BNF.csv") 
joined_files_nf = os.path.join("*NIFTY.csv")

In [3]:
joined_list_bnf = glob.glob(joined_files_bnf)
joined_list_nf = glob.glob(joined_files_nf)

In [4]:
print(len(joined_list_bnf))
print(len(joined_list_nf))

104
104


In [5]:
df_from_each_file_bnf = (pd.read_csv(f, sep=',', names = ['name', 'date', 'time', 'open', 'high', 'low', 'close', 'farzi', 'farzi2']) for f in joined_list_bnf)

df_from_each_file_nf = (pd.read_csv(f, sep=',', names = ['name', 'date', 'time', 'open', 'high', 'low', 'close', 'farzi', 'farzi2']) for f in joined_list_nf)

In [6]:
bnf_df   = pd.concat(df_from_each_file_bnf, axis= 0, ignore_index=True)
nf_df   = pd.concat(df_from_each_file_nf, axis= 0, ignore_index=True)


In [7]:
bnf_df

Unnamed: 0,name,date,time,open,high,low,close,farzi,farzi2
0,BANKNIFTY,20141001,09:16,15377.60,15377.60,15362.30,15366.90,0.0,
1,BANKNIFTY,20141001,09:17,15361.55,15366.45,15340.95,15340.95,0.0,
2,BANKNIFTY,20141001,09:18,15343.25,15343.25,15324.65,15336.80,0.0,
3,BANKNIFTY,20141001,09:19,15336.15,15338.80,15325.30,15328.50,0.0,
4,BANKNIFTY,20141001,09:20,15329.60,15351.65,15326.80,15349.00,0.0,
...,...,...,...,...,...,...,...,...,...
1037382,BANKNIFTY,20170228,15:27,20623.20,20639.90,20621.50,20639.50,58.0,
1037383,BANKNIFTY,20170228,15:28,20639.30,20641.20,20633.10,20639.10,59.0,
1037384,BANKNIFTY,20170228,15:29,20641.30,20641.30,20625.20,20628.80,60.0,
1037385,BANKNIFTY,20170228,15:30,20623.90,20639.00,20609.20,20609.20,60.0,


In [8]:
nf_df

Unnamed: 0,name,date,time,open,high,low,close,farzi,farzi2
0,NIFTY,20121101,09:16,5615.45,5615.55,5608.75,5613.10,,
1,NIFTY,20121101,09:17,5611.90,5613.05,5609.60,5610.80,,
2,NIFTY,20121101,09:18,5610.60,5610.85,5606.10,5606.80,,
3,NIFTY,20121101,09:19,5606.60,5607.70,5605.10,5605.10,,
4,NIFTY,20121101,09:20,5605.45,5609.05,5605.45,5608.45,,
...,...,...,...,...,...,...,...,...,...
1209024,NIFTY,20130430,15:26,5929.00,5929.00,5925.95,5925.95,,
1209025,NIFTY,20130430,15:27,5925.50,5926.05,5924.15,5924.95,,
1209026,NIFTY,20130430,15:28,5925.05,5925.40,5922.35,5923.15,,
1209027,NIFTY,20130430,15:29,5921.90,5925.00,5921.90,5924.35,,


In [9]:
bnf_df['date'] = pd.to_datetime(bnf_df.date, format = '%Y%m%d')
bnf_df['time'] = pd.to_datetime(bnf_df.time, format='%H:%M').dt.time

nf_df['date'] = pd.to_datetime(nf_df.date, format = '%Y%m%d')
nf_df['time'] = pd.to_datetime(nf_df.time, format='%H:%M').dt.time

In [10]:
bnf_df = bnf_df.sort_values(by = ['date', 'time'])
nf_df = nf_df.sort_values(by = ['date', 'time'])

In [11]:
bnf_df = bnf_df.drop(['farzi', 'farzi2'], axis = 1)
nf_df = nf_df.drop(['farzi', 'farzi2'], axis = 1)

In [12]:
bnf_df.tail(20)

Unnamed: 0,name,date,time,open,high,low,close
860758,BANKNIFTY,2021-04-30,15:13:00,32803.25,32810.0,32798.1,32810.0
860759,BANKNIFTY,2021-04-30,15:14:00,32813.2,32840.5,32813.2,32819.0
860760,BANKNIFTY,2021-04-30,15:15:00,32813.35,32826.25,32812.15,32824.15
860761,BANKNIFTY,2021-04-30,15:16:00,32819.95,32826.45,32810.85,32819.7
860762,BANKNIFTY,2021-04-30,15:17:00,32814.95,32815.05,32798.3,32798.3
860763,BANKNIFTY,2021-04-30,15:18:00,32798.4,32812.0,32790.35,32790.35
860764,BANKNIFTY,2021-04-30,15:19:00,32791.85,32799.2,32752.65,32767.3
860765,BANKNIFTY,2021-04-30,15:20:00,32757.8,32784.65,32757.8,32774.95
860766,BANKNIFTY,2021-04-30,15:21:00,32772.05,32772.95,32704.4,32714.25
860767,BANKNIFTY,2021-04-30,15:22:00,32709.85,32719.95,32663.4,32680.55


In [22]:
bnf_df[(bnf_df.date.dt.date == datetime.date(2021, 4, 30)) & (bnf_df.time == datetime.date(2021, 4, 30))]

Unnamed: 0,name,date,time,open,high,low,close
860400,BANKNIFTY,2021-04-30,09:08:00,33112.40,33112.40,33112.40,33112.40
860401,BANKNIFTY,2021-04-30,09:16:00,33214.95,33214.95,33028.60,33111.15
860402,BANKNIFTY,2021-04-30,09:17:00,33108.80,33138.00,33042.50,33131.95
860403,BANKNIFTY,2021-04-30,09:18:00,33124.20,33209.05,33106.95,33113.60
860404,BANKNIFTY,2021-04-30,09:19:00,33094.10,33113.75,33052.75,33083.50
...,...,...,...,...,...,...,...
860773,BANKNIFTY,2021-04-30,15:28:00,32737.45,32741.95,32722.90,32725.95
860774,BANKNIFTY,2021-04-30,15:29:00,32730.90,32733.65,32713.35,32718.80
860775,BANKNIFTY,2021-04-30,15:30:00,32715.90,32726.20,32706.25,32714.35
860776,BANKNIFTY,2021-04-30,15:31:00,32724.90,32724.90,32724.90,32724.90


In [29]:
bnf_df[(bnf_df.date.dt.date == datetime.date(2021, 4, 30)) & (bnf_df.time == datetime.time(15,15))]

Unnamed: 0,name,date,time,open,high,low,close
860760,BANKNIFTY,2021-04-30,15:15:00,32813.35,32826.25,32812.15,32824.15


In [34]:
bnf_df[(bnf_df.time > datetime.time(15,30)) ]

Unnamed: 0,name,date,time,open,high,low,close
1014357,BANKNIFTY,2014-10-23,18:24:00,16508.40,16508.40,16508.40,16508.40
1014358,BANKNIFTY,2014-10-23,18:31:00,16508.90,16524.90,16490.95,16490.95
1014359,BANKNIFTY,2014-10-23,18:32:00,16486.75,16497.15,16483.85,16489.15
1014360,BANKNIFTY,2014-10-23,18:33:00,16485.05,16490.70,16485.05,16486.50
1014361,BANKNIFTY,2014-10-23,18:34:00,16489.65,16493.80,16474.60,16474.60
...,...,...,...,...,...,...,...
860021,BANKNIFTY,2021-04-28,15:32:00,33717.50,33722.80,33717.50,33722.80
860398,BANKNIFTY,2021-04-29,15:31:00,33665.05,33665.05,33665.05,33665.05
860399,BANKNIFTY,2021-04-29,15:32:00,33708.30,33714.50,33708.30,33714.50
860776,BANKNIFTY,2021-04-30,15:31:00,32724.90,32724.90,32724.90,32724.90


In [15]:
bnf_df

Unnamed: 0,name,date,time,open,high,low,close
240351,BANKNIFTY,2010-03-02,09:01:00,8826.90,8856.95,8826.90,8839.80
240352,BANKNIFTY,2010-03-02,09:02:00,8840.75,8853.90,8832.80,8852.30
240353,BANKNIFTY,2010-03-02,09:03:00,8855.70,8855.70,8844.65,8846.65
240354,BANKNIFTY,2010-03-02,09:04:00,8843.85,8850.70,8843.75,8846.70
240355,BANKNIFTY,2010-03-02,09:05:00,8846.40,8846.40,8837.75,8841.60
...,...,...,...,...,...,...,...
860773,BANKNIFTY,2021-04-30,15:28:00,32737.45,32741.95,32722.90,32725.95
860774,BANKNIFTY,2021-04-30,15:29:00,32730.90,32733.65,32713.35,32718.80
860775,BANKNIFTY,2021-04-30,15:30:00,32715.90,32726.20,32706.25,32714.35
860776,BANKNIFTY,2021-04-30,15:31:00,32724.90,32724.90,32724.90,32724.90


In [35]:
#bnf_df.to_csv('bank_nifty.csv', index = None)
#nf_df.to_csv('nifty.csv', index = None)

In [48]:
bnf_df['Year'] = bnf_df.date.dt.year
bnf_df['Month'] = bnf_df.date.dt.month
bnf_df['m_day'] = bnf_df.date.dt.day

In [49]:
bnf_df

Unnamed: 0,name,date,time,open,high,low,close,Year,Month,m_day
240351,BANKNIFTY,2010-03-02,09:01:00,8826.90,8856.95,8826.90,8839.80,2010,3,2
240352,BANKNIFTY,2010-03-02,09:02:00,8840.75,8853.90,8832.80,8852.30,2010,3,2
240353,BANKNIFTY,2010-03-02,09:03:00,8855.70,8855.70,8844.65,8846.65,2010,3,2
240354,BANKNIFTY,2010-03-02,09:04:00,8843.85,8850.70,8843.75,8846.70,2010,3,2
240355,BANKNIFTY,2010-03-02,09:05:00,8846.40,8846.40,8837.75,8841.60,2010,3,2
...,...,...,...,...,...,...,...,...,...,...
860773,BANKNIFTY,2021-04-30,15:28:00,32737.45,32741.95,32722.90,32725.95,2021,4,30
860774,BANKNIFTY,2021-04-30,15:29:00,32730.90,32733.65,32713.35,32718.80,2021,4,30
860775,BANKNIFTY,2021-04-30,15:30:00,32715.90,32726.20,32706.25,32714.35,2021,4,30
860776,BANKNIFTY,2021-04-30,15:31:00,32724.90,32724.90,32724.90,32724.90,2021,4,30


In [52]:
bnf_df[(bnf_df.Year == 2021) & (bnf_df.Month == 4) & (bnf_df.m_day == 30) & (bnf_df.time == datetime.time(15,30)) ]

Unnamed: 0,name,date,time,open,high,low,close,Year,Month,m_day
860775,BANKNIFTY,2021-04-30,15:30:00,32715.9,32726.2,32706.25,32714.35,2021,4,30
