In [1]:
# import libraries for stock analysis
import pandas as pd
import holidays
import matplotlib.pyplot as plt
import scienceplots
from datetime import date, datetime, timedelta
import os
from IPython.display import display
import numpy as np



from nsetools import Nse
nse = Nse()

from nsepy import get_history

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)
pd.set_option('display.width',1000)
pd.set_option('display.colheader_justify','center')
pd.set_option('display.precision',3)

plt.style.use('science')
pd.options.plotting.backend = "plotly"

%matplotlib widget
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

In [2]:
# import libraries for dask
from dask.distributed import Client, LocalCluster
import dask.dataframe as dd
from dask.delayed import delayed

# create dask local cluster
cluster = LocalCluster(n_workers=4, threads_per_worker=1, memory_limit='2GB')

client = Client(cluster)

# print dask dashboard link
print(client.dashboard_link)

http://127.0.0.1:8787/status


# Download historical stock data

In [3]:
# check today's date and check if its market holiday
def check_market_holiday():
    today = pd.to_datetime('today').date()
    india_holidays = holidays.India()
    if today in india_holidays:
        return True
    else:
        return False

# get the stock data from yahoo finance if its not a market holiday
def get_stock_data(stock_code, latest=True):
    if check_market_holiday():
        print('Market Holiday')
        return None
    else:
        # check if its a valid stock code and not a market holiday
        if nse.is_valid_code(stock_code) and check_market_holiday() == False:
            # create a filename for stock data
            stock_file = f'historical\{stock_code}.xlsx'
            
            # create empty dataframe for stock data
            stock_data = pd.DataFrame()
            start_date = None
            
            # check if stock_file exists
            if os.path.exists(stock_file):
                # load existing stock data from excel file
                stock_data = pd.read_excel(f'historical\{stock_code}.xlsx', index_col='Date')
                
                if len(stock_data) == 0:
                    print(f'No Stock Data for {stock_code}')
                    # calculate start date to date from last date
                    start_date = date.today() - timedelta(days=365*2)
                else:
                    # return stock_data if latest is False
                    if latest == False:
                        return stock_data
                    
                    # get the last date from the stock data
                    last_date = stock_data.index[-1]
                    
                    # check if last_date is today
                    if last_date == pd.to_datetime('today').date():
                        print(f'{stock_code}: Stock Data is up to date')
                        return stock_data
            
                    # calculate start date to date from last date
                    start_date = last_date + timedelta(days=1)
                    
                    # convert start_date to datetime.date
                    start_date = start_date.date()
                    
            # print(f'Getting Stock Data for {stock_code} from {start_date} to {date.today()}')
            
            # if there is no start_date, create a start_date that is 2 years ago
            if start_date == None:
                start_date = date.today() - timedelta(days=365*2)
            
            # get the stock data
            new_stock_data = get_history(symbol=stock_code, start=start_date, end=date.today())
            
            # append the stock data to existing stock data
            stock_data = stock_data.append(new_stock_data)
            
            # return stock_data
            return stock_data
        else:
            print('Invalid Stock Code')
            

# get_stock_data('SBIN')

# Download all stocks

In [25]:
# write a function to load the data from excel file
STOCK_FILE = 'data\stock_codes.xlsx'

# check if stock data exists
def is_stock_data_loaded(stock_code, latest=True):
    stock_data = get_stock_data(stock_code, latest=latest)

    if stock_data is None or len(stock_data) == 0:
        return False
    return True

# load historical stock data
def load_stock_codes(stock_file=STOCK_FILE):
    # read stock_file into pandas dataframe
    df_stock_codes = pd.read_excel(stock_file)

    # add a new column 'loaded' to stock_codes
    df_stock_codes['loaded'] = False

    # display the count of stock_codes
    print(f'Total Stock Codes: {len(df_stock_codes)}')

    # assign a new column 'loaded' on column 'ticker'
    df_stock_codes['loaded'] = df_stock_codes['ticker'].apply(is_stock_data_loaded)

    # display the count of stock codes loaded
    print(f'Total Stock Codes Loaded: {df_stock_codes["loaded"].value_counts()}')


load_stock_codes()


Unnamed: 0,ticker,company_name,loaded
0,20MICRONS,20 Microns Limited,False
1,21STCENMGM,21st Century Management Services Limited,False
2,360ONE,360 ONE WAM LIMITED,False


No Stock Data for 3IINFOLTD
No Stock Data for 4THDIM
No Stock Data for AAATECH
No Stock Data for AARTIPHARM
