In [34]:
from fyers_api import fyersModel
from fyers_api import accessToken
from login import login
import credentials.credentials as cred
import pandas as pd
import datetime
import os

In [35]:
symbol = 'NSE:RELIANCE-EQ'
interval = '15'

startDate = datetime.date(2020, 8, 1)
endDate = datetime.date(2022, 8, 3)

In [36]:
# returns a pandas dataframe with ohclv data
def get_historical_data(symbol, interval, start_date, end_date):
    access_token = login()
    # print(access_token)
    fyers = fyersModel.FyersModel(client_id=cred.client_id, token=access_token,log_path="./log/")
    
    data = {
            "symbol":f"{symbol}",
            "resolution":f"{interval}",
            "date_format":"1",
            "range_from":f"{start_date}",
            "range_to":f"{end_date}",
            "cont_flag":"1"
            }

    response = fyers.history(data)

    if response['s'] == 'error':
        print(response)
        # exit()
        return None
    elif response['s'] == 'ok':
        candle_data = response['candles']

        df = pd.DataFrame(candle_data)
        df.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
        df['Date'] = pd.to_datetime(df['Date'], unit='s')
        df['Date'] = df['Date'].dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
        df['Date'] = df['Date'].dt.tz_localize(None)
        # df.set_index('Date', inplace=True)
        print("Length of downloaded data:", len(df))
        return df

In [37]:
# Testing the function
# Downloading dummy data
testStartDate = '2020-08-01'
testEndDate = '2020-08-03'

data = get_historical_data(symbol, interval, testStartDate, testEndDate)
data.tail()

token is valid
Length of downloaded data: 25


Unnamed: 0,Date,Open,High,Low,Close,Volume
20,2020-08-03 14:15:00,2029.5,2034.0,2025.0,2025.95,507975
21,2020-08-03 14:30:00,2026.85,2032.0,2023.0,2024.05,438004
22,2020-08-03 14:45:00,2023.6,2024.1,2018.0,2020.45,558415
23,2020-08-03 15:00:00,2020.5,2022.0,2003.95,2007.0,1737021
24,2020-08-03 15:15:00,2007.9,2010.0,2003.1,2007.0,1148422


In [38]:
difference = (endDate - startDate).days
print("Number of days:", difference)

Number of days: 732


In [39]:
final_df = pd.DataFrame()

tempEndDate = endDate
tempStartDate = startDate

over = False
while not over:
    if difference > 100:
        tempEndDate = tempStartDate + datetime.timedelta(days=99)
        difference -= 100
    else:
        tempEndDate = tempStartDate + datetime.timedelta(days=difference)
        over = True
    
    # download the data from startDate to tempEndDate
    print(tempStartDate, "to", tempEndDate, "difference:", (tempEndDate - tempStartDate).days)
    data = get_historical_data(symbol, interval, tempStartDate, tempEndDate)
    final_df = pd.concat([final_df, data])
    # set startDate to tempEndDate
    tempStartDate = tempEndDate + datetime.timedelta(days=1)

print("Length of final dataframe:", len(final_df))


2020-08-01 to 2020-11-08 difference: 99
token is valid
Length of downloaded data: 1725
2020-11-09 to 2021-02-16 difference: 99
token is valid
Length of downloaded data: 1706
2021-02-17 to 2021-05-27 difference: 99
token is valid
Length of downloaded data: 1642
2021-05-28 to 2021-09-04 difference: 99
token is valid
Length of downloaded data: 1726
2021-09-05 to 2021-12-13 difference: 99
token is valid
Length of downloaded data: 1654
2021-12-14 to 2022-03-23 difference: 99
token is valid
Length of downloaded data: 1725
2022-03-24 to 2022-07-01 difference: 99
token is valid
Length of downloaded data: 1725
2022-07-02 to 2022-08-03 difference: 32
token is valid
Length of downloaded data: 575
Length of final dataframe: 12478


In [40]:
index = pd.Index(range(len(final_df)))
final_df = final_df.set_index(index)
final_df.index.name = 'Count'
final_df

Unnamed: 0_level_0,Date,Open,High,Low,Close,Volume
Count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,2020-08-03 09:15:00,2051.30,2054.00,2035.0,2041.75,1986174
1,2020-08-03 09:30:00,2041.50,2046.60,2032.3,2039.00,1044597
2,2020-08-03 09:45:00,2039.10,2041.00,2026.7,2028.90,1051054
3,2020-08-03 10:00:00,2028.95,2031.35,2015.2,2018.50,1617555
4,2020-08-03 10:15:00,2018.20,2023.40,2009.0,2020.40,1655045
...,...,...,...,...,...,...
12473,2022-08-03 14:15:00,2590.00,2595.00,2587.0,2591.05,194596
12474,2022-08-03 14:30:00,2591.05,2597.00,2589.0,2596.40,190933
12475,2022-08-03 14:45:00,2596.65,2599.00,2594.3,2597.05,180075
12476,2022-08-03 15:00:00,2597.20,2610.00,2597.2,2608.55,810202


In [41]:
# saving the data to a csv file
if not os.path.isdir(f'../../data/{symbol}'):
    os.mkdir(f'../../data/{symbol}')

final_df.to_csv(f'../../data/{symbol}/{symbol} {startDate} to {endDate} {interval}m.csv')

In [42]:
# # verifying the date, check if the date difference between 2 consecutive rows is 1
# series = final_df.index
# print(type(series[0]))

# for i in range(len(series)):
#     date0 = series[i].to_pydatetime().date()
#     date1 = series[i+1].to_pydatetime().date()

#     sub = (date1 - date0).days
#     if sub not in [0, 1]:
#         print(date0, date1)

In [43]:
# visualising the downloaded data
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime

In [44]:
df = pd.read_csv(f'../../data/{symbol}/{symbol} {startDate} to {endDate} {interval}m.csv')

In [45]:
# slicing the data 
st=10400
dfpl = df[st:st+350]
dfpl.reset_index(inplace=True)

fig = go.Figure(data=[go.Candlestick(x=dfpl.index,
                open=dfpl['Open'],
                high=dfpl['High'],
                low=dfpl['Low'],
                close=dfpl['Close']),
               ])
fig.show()