<a href="https://colab.research.google.com/github/rushikeshnakhate/A2DataTech/blob/master/StocksV1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [76]:
pip install jugaad_data



In [77]:
from pathlib import Path
from datetime import date
from jugaad_data.nse import NSELive
from jugaad_data.nse import bhavcopy_save
import pandas as pd
output_dir = Path("/content/sample_data")
import os

In [78]:
def validate_required_args(kwargs, *required_args):
    for arg in required_args:
        if arg not in kwargs:
            raise ValueError(f"Error: '{arg}' argument not provided, actual received: {kwargs}")

In [79]:
import pickle

def load_or_download_and_cache_data(download_function:callable,**kwargs):
  validate_required_args(kwargs, 'cache_file')
  cache_file : str = kwargs.get("cache_file")

  if os.path.exists(cache_file):
    with open(cache_file,'rb') as file:
      return pickle.load(file)

  data_df = download_function(**kwargs)
  # if not data_df.empty:
  with open(cache_file,'wb') as file:
    pickle.dump(data_df,file)
  return data_df

In [80]:
import os
def download_bhavcopy(**kwargs):
  """
  # bhavcopy_save: Generates a CSV file with a random name.
  # The expected name for the downloaded Bhavcopy file is "bhavcopy_${start_date}.csv".
  # The cache file is named as "bhavcopy_${start_date}.pkl". A renaming process is required.
  """
  validate_required_args(kwargs, 'cache_file','start_date')
  cache_file: str = kwargs.get("cache_file")
  start_date : date= kwargs.get("start_date")

  generated_csv_filename = bhavcopy_save(start_date, output_dir)
  cache_file_to_csv_name = cache_file.with_suffix(".csv")
  os.rename(generated_csv_filename,cache_file_to_csv_name)
  return pd.read_csv(cache_file_to_csv_name)


In [81]:
def fetch_or_genenate_stocks(start_date: date):
  output_file  = Path(output_dir)/("NseData_" + str(start_date) +".pkl")
  return load_or_download_and_cache_data(download_bhavcopy,start_date= start_date,cache_file=output_file)

In [82]:
import yfinance as yf
import concurrent.futures
import threading

def populate_sector(tick: str):
  tick_ = tick + ".NS"
  try:
    ticker = yf.Ticker(tick_)
    # print("populate_sector sector for sym={}".format(tick))
    return ticker.info['sector']
  except Exception as e:
    return "NOT_FOUND_ON_YAHOO"


def download_sectors(**kwargs):
  validate_required_args(kwargs, 'stocks')
  stocks_df : pd.Dataframe = kwargs.get('stocks')
  new_data = []
  for symbol in stocks_df['SYMBOL']:
      sector = populate_sector(symbol)
      new_data.append((symbol, sector))
  return pd.DataFrame(new_data, columns=['SYMBOL', 'SECTOR'])


# lock = threading.Lock()  # Create a lock for thread safety
# results = {}  # Use a dictionary to store results with symbol as key
# def process_stock(symbol):
#   # Acquire the lock to prevent race conditions
#   with lock:
#     sector = populate_sector(symbol)
#     results[symbol] = sector

# def download_sectors_concurrently(**kwargs):
#   validate_required_args(kwargs, 'stocks')
#   stocks_df : pd.Dataframe = kwargs.get('stocks')
#   with concurrent.futures.ThreadPoolExecutor() as executor:
#       executor.map(process_stock, stocks_df['SYMBOL'].tolist())
#   dd1 = pd.DataFrame(results)
#   print("00000000000000000")
#   display(dd1)
#   print("00000000000000000")
#   return dd1

In [83]:
# def fetch_or_generate_sector_to_stocks(stocks : pd.DataFrame):
#   output_file  = Path(output_dir)/("Sectors.pkl")
#   stocks_to_sector_map_df = load_or_download_and_cache_data(download_sectors,cache_file=output_file,stocks=stocks)
#   dd =  pd.merge(stocks, stocks_to_sector_map_df, on='SYMBOL', how='inner',suffixes=('', ''))
#   if dd.empty:
#     raise ValueError("Error: The DataFrame is empty.")
#   return dd


def fetch_or_generate_sector_to_stocks(stocks : pd.DataFrame):
  output_file  = Path(output_dir)/("Sectors.pkl")
  stocks_to_sector_map_df = load_or_download_and_cache_data(download_sectors,cache_file=output_file,stocks=stocks)
  # display(stocks_to_sector_map_df)
  try:
   return pd.merge(stocks, stocks_to_sector_map_df, on='SYMBOL', how='inner',suffixes=('', ''))
  except pd.errors.MergeError as e:
    print(f"Merge failed: {e}")

In [84]:
def calculate_performace(stocks_df : pd.DataFrame, start_date: date):
    # Calculate daily returns
  print("calculate_performace for date={}".format(start_date))
  stocks_df['DAILY_RETURN'] =(stocks_df['CLOSE'] - stocks_df['OPEN'])/ stocks_df['OPEN']

  # Group by 'SECTOR' and calculate average daily return and volatility
  sector_performance_df = stocks_df.groupby('SECTOR').agg({
      # stocks_list=('STOCK', lambda x: list(x))
      'DAILY_RETURN': ['std','mean','count']
  }).reset_index()

  sector_performance_df["TRADEDATE"] = start_date

  # Flatten the column names, but only for columns where the second part is not an empty string
  sector_performance_df.columns = [
      f"{col[0]}_{col[1]}" if col[1] else col[0] for col in sector_performance_df.columns
  ]
  return sector_performance_df


def prepare_stocks_data_and_calculate_performance(**kwargs):
  try:
    validate_required_args(kwargs, 'start_date')
    start_date : date = kwargs.get('start_date')
    stocks_df = fetch_or_genenate_stocks(start_date)
    stocks_df_with_sector = fetch_or_generate_sector_to_stocks(stocks_df)
    return calculate_performace(stocks_df_with_sector,start_date)
  except ValueError as e:
    print(e)

def fetch_or_generate_performance_for_stocks(start_date:date):
  sector_performance_output_file = Path(output_dir)/("SectorPeformance_" + str(start_date) +".pkl")
  return load_or_download_and_cache_data(prepare_stocks_data_and_calculate_performance,start_date=start_date,cache_file=sector_performance_output_file)

In [85]:
from google.colab import data_table
from datetime import date, timedelta#

data_table.enable_dataframe_formatter()

if __name__ == "__main__":
  start_date = date (2024,1,18)
  num_days = 10
  date_range = [start_date - timedelta(days=i) for i in range(num_days)]
  for d in date_range:
    df = fetch_or_generate_performance_for_stocks(d)
    display(df)






Unnamed: 0,SECTOR,DAILY_RETURN_std,DAILY_RETURN_mean,DAILY_RETURN_count,TRADEDATE
0,Basic Materials,0.022636,0.00014,330,2024-01-18
1,Communication Services,0.030817,-0.000281,73,2024-01-18
2,Consumer Cyclical,0.025206,0.001927,373,2024-01-18
3,Consumer Defensive,0.020967,-0.00244,142,2024-01-18
4,Energy,0.019827,0.003527,35,2024-01-18
5,Financial Services,0.014939,-0.001455,1024,2024-01-18
6,Healthcare,0.019838,0.002678,129,2024-01-18
7,Industrials,0.02368,0.001736,388,2024-01-18
8,NOT_FOUND_ON_YAHOO,0.024667,0.00165,821,2024-01-18
9,Real Estate,0.031887,0.009497,64,2024-01-18


Unnamed: 0,SECTOR,DAILY_RETURN_std,DAILY_RETURN_mean,DAILY_RETURN_count,TRADEDATE
0,Basic Materials,0.022535,-0.005132,334,2024-01-17
1,Communication Services,0.038691,0.005711,73,2024-01-17
2,Consumer Cyclical,0.022766,-0.004004,375,2024-01-17
3,Consumer Defensive,0.026868,-0.000433,142,2024-01-17
4,Energy,0.014955,0.000888,35,2024-01-17
5,Financial Services,0.01623,-0.000103,1082,2024-01-17
6,Healthcare,0.016931,-0.004468,129,2024-01-17
7,Industrials,0.023632,0.001193,388,2024-01-17
8,NOT_FOUND_ON_YAHOO,0.021942,-0.001196,867,2024-01-17
9,Real Estate,0.030142,0.003197,64,2024-01-17


Unnamed: 0,SECTOR,DAILY_RETURN_std,DAILY_RETURN_mean,DAILY_RETURN_count,TRADEDATE
0,Basic Materials,0.022732,-0.010494,330,2024-01-16
1,Communication Services,0.023743,-0.015012,73,2024-01-16
2,Consumer Cyclical,0.024568,-0.014766,373,2024-01-16
3,Consumer Defensive,0.024344,-0.00965,142,2024-01-16
4,Energy,0.021318,-0.00644,36,2024-01-16
5,Financial Services,0.014802,-0.003482,978,2024-01-16
6,Healthcare,0.02378,-0.009749,129,2024-01-16
7,Industrials,0.024232,-0.012569,388,2024-01-16
8,NOT_FOUND_ON_YAHOO,0.020521,-0.006008,829,2024-01-16
9,Real Estate,0.026426,-0.019248,64,2024-01-16


Unnamed: 0,SECTOR,DAILY_RETURN_std,DAILY_RETURN_mean,DAILY_RETURN_count,TRADEDATE
0,Basic Materials,0.029695,-0.001037,332,2024-01-15
1,Communication Services,0.029421,-0.009211,73,2024-01-15
2,Consumer Cyclical,0.027972,-0.00613,374,2024-01-15
3,Consumer Defensive,0.027551,-0.013983,142,2024-01-15
4,Energy,0.020202,0.000117,35,2024-01-15
5,Financial Services,0.041982,0.003275,1113,2024-01-15
6,Healthcare,0.021771,-0.006409,129,2024-01-15
7,Industrials,0.02988,-0.004145,387,2024-01-15
8,NOT_FOUND_ON_YAHOO,0.021959,-0.001395,830,2024-01-15
9,Real Estate,0.032541,0.002002,64,2024-01-15


BadZipFile: File is not a zip file

In [None]:
# ! rm -rf /content/sample_data/*pkl
# ! rm -rf /content/sample_data/*csv