<a href="https://colab.research.google.com/github/rushikeshnakhate/A2DataTech/blob/master/Stocks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [403]:
# pip install jugaad_data

In [404]:
from pathlib import Path
from datetime import date
from jugaad_data.nse import NSELive
from jugaad_data.nse import bhavcopy_save
import pandas as pd
output_dir = Path("/content/sample_data")
import os

In [405]:
import pickle
def load_from_pickle(file_path):
    try:
      new_file_path = file_path.with_suffix(".pkl")
      with open(new_file_path,'rb') as file:
        return pickle.load(file)
    except FileNotFoundError:
      return None

def save_to_pickle(obj_to_be_pkl):
  pkl_file_name = obj_to_be_pkl.with_suffix(".pkl")
  with open(pkl_file_name,'wb') as file:
    pickle.dump(obj_to_be_pkl,file)

In [406]:
def download_data(start_date, output_file ,download_function):
    data = load_from_pickle(output_file)
    if data is None:
        generated_file = download_function(start_date, output_file)
        os.rename(generated_file,output_file)
        save_to_pickle(output_file)
    return load_from_pickle(output_file)

In [407]:
def download_bhavcopy(start_date,output_file):
    generated_filename = bhavcopy_save(start_date, output_dir)
    return generated_filename

In [408]:
def fetch_or_genenate_stocks(start_date):
  output_file  = Path(output_dir)/("NseData_" + str(start_date) +".csv")
  return download_data(start_date, output_file,download_bhavcopy)

In [409]:
def print_df(df):
  from google.colab import data_table
  data_table.enable_dataframe_formatter()
  display(df)

In [410]:
import yfinance as yf

def get_sector(tick : str = ""):
  tick_ = tick + ".NS"
  try:
    ticker = yf.Ticker(tick_)
    sector = ticker.info['sector']
    return sector
  except Exception as e:
    return "NOT_FOUND_ON_YAHOO"

In [411]:
import concurrent.futures

class DataWithSector:
  def __init__(self,start_date):
    self.start_date = start_date
    # self.stock_with_sector_output_file  = Path(output_dir)/("NseDataWithSector_" + str(start_date) +".csv")
    self.stock_with_sector_output_file  = Path(output_dir)/("NseDataWithSector.csv")

  # Function to get sector in parallel
  def add_sector_column_to_stock_data_concurently(self,df):
    with concurrent.futures.ThreadPoolExecutor() as executor:
      df['SECTOR'] = list(executor.map(get_sector, df['SYMBOL']))
    return df


  def fetch_or_genenate_stocks_with_sector(self):

      data = load_from_pickle(self.stock_with_sector_output_file)
      if data is None:
        #  call step to get stocks
        stock_data = fetch_or_genenate_stocks(self.start_date)
        stock_data_df = pd.read_csv(stock_data)
        data = self.add_sector_column_to_stock_data_concurently(stock_data_df)
        data.to_csv(self.stock_with_sector_output_file)
        save_to_pickle(self.stock_with_sector_output_file)

      return load_from_pickle(self.stock_with_sector_output_file)

  def get_stock_with_sector_df(self):
    df = pd.read_csv(self.stock_with_sector_output_file)
    df.drop(columns='Unnamed: 13', inplace=True)
    return df


In [412]:
from abc import ABC, abstractmethod

class Performace(ABC):
  def __init__(self, start_date):
    self.start_date = start_date

  @abstractmethod
  def calculate_performace(self,start_date):
    pass

class PerformaceByMeanAndDaviation(Performace):
  def __init__(self, start_date):
    super().__init__(start_date)
    self.sector_performance_df = pd.DataFrame()
    self.sector_performance_output_file = Path(output_dir)/("SectorPeformance_" + str(self.start_date) +".csv")

  def calculate_performace(self,stocks_with_sector_df):
      # Calculate daily returns
      stocks_with_sector_df['DAILY_RETURN'] = (stocks_with_sector_df['CLOSE'] - stocks_with_sector_df['OPEN']) / stocks_with_sector_df['OPEN']

      # Group by 'SECTOR' and calculate average daily return and volatility
      self.sector_performance_df = stocks_with_sector_df.groupby('SECTOR').agg({
          # stocks_list=('STOCK', lambda x: list(x))
          'DAILY_RETURN': ['std','mean','count']
      }).reset_index()

      self.sector_performance_df["TRADEDATE"] = self.start_date

      # Flatten the column names, but only for columns where the second part is not an empty string
      self.sector_performance_df.columns = [
          f"{col[0]}_{col[1]}" if col[1] else col[0] for col in self.sector_performance_df.columns
      ]
      return self.sector_performance_df

  def fetch_or_populate_performace(self):
    data = load_from_pickle(self.sector_performance_output_file)
    if data is not None:
      self.sector_performance_df  = pd.read_csv(self.sector_performance_output_file)
      return data

    # call step to get stocks , get sector and merge them
    dataWithSector = DataWithSector (self.start_date)
    stock_with_sector = dataWithSector.fetch_or_genenate_stocks_with_sector()
    stocks_with_sector_df = pd.read_csv(stock_with_sector)

    self.sector_performance_df = self.calculate_performace(stocks_with_sector_df)
    self.sector_performance_df.to_csv(self.sector_performance_output_file,index=False)
    save_to_pickle(self.sector_performance_output_file)

    data =  load_from_pickle(self.sector_performance_output_file)
    self.sector_performance_df  = pd.read_csv(self.sector_performance_output_file)
    return data

  def get_df(self):
    return self.sector_performance_df



In [413]:
import matplotlib.pyplot as plt
import pandas as pd

# Assuming your data is stored in a DataFrame named 'df'
# If not, you need to create a DataFrame with the provided data.

def plot(df):
  fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))

  df.plot(kind='bar', x='SECTOR', y='DAILY_RETURN_mean', ax=ax1, legend=False, color='green')
  ax1.set_ylabel('Mean Daily Return')
  ax1.set_title('Mean Daily Return for Different Sectors')

  df.plot(kind='bar', x='SECTOR', y='DAILY_RETURN_std', ax=ax2, legend=False, color='blue')
  ax2.set_ylabel('Standard Deviation of Daily Return')
  ax2.set_title('Standard Deviation of Daily Return for Different Sectors')

  plt.tight_layout()
  plt.show()

In [414]:
# grouped_df = df.groupby('SECTOR')
# for group_name, group_data in grouped_df:
#   print_df(group_name)
#   print_df(group_data)

In [415]:
# pip install forex-python
# from forex_python.converter import CurrencyRates
# symbol = 'EUR'
# c = CurrencyRates()
# rates = c.get_rates(symbol)
# print(rates)



import yfinance as yf
def get_fx_rate(start_date):

  # today = pd.to_datetime("today")
  # Calculate the start date for 1 year ago
  one_year_ago = start_date - pd.DateOffset(years=1)

  # Get historical data for USD/INR for the past year
  currency_pair = yf.Ticker("USDINR=X")

  fx_rate_df = currency_pair.history(start=one_year_ago, end=start_date)
  fx_rate_df.drop(columns=['Volume','Dividends','Stock Splits'], inplace = True)
  return fx_rate_df


def fetch_or_populate_fx_rate(start_date):
  fx_rate_pkl_file  = Path(output_dir)/("fxRates_" + str(start_date) +".pkl")
  if fx_rate_pkl_file.is_file():
    # Load DataFrame from pickle file
    with open(fx_rate_pkl_file, 'rb') as file:
        return pickle.load(file)

  # Save DataFrame to pickle file
  get_fx_rate_df = get_fx_rate(start_date)
  with open(fx_rate_pkl_file, 'wb') as file:
    pickle.dump(get_fx_rate_df, file)

  return get_fx_rate_df

In [416]:
  # print_df(performaceByMeanAndDaviation.get_df())
  # plot(performaceByMeanAndDaviation.get_df())
  # dataWithSector = DataWithSector(start_date)
  # df = dataWithSector.get_stock_with_sector_df()


In [None]:
from datetime import date, timedelta

def run_concurrently(start_date):
  print("generating NSE stock data for date={}".format(start_date))
  performaceByMeanAndDaviation = PerformaceByMeanAndDaviation(start_date)
  performaceByMeanAndDaviation.fetch_or_populate_performace()
  print_df(performaceByMeanAndDaviation.get_df())

if __name__ == "__main__":
  # Set the start date
  start_date = date(2024, 1, 17)
  fx_rate_df = fetch_or_populate_fx_rate(start_date)

  # Number of days to run the program
  num_days = 10
  date_range = [start_date - timedelta(days=i) for i in range(num_days)]

  # for date in date_range:
  #   run_concurrently(date)

  # Use ThreadPoolExecutor to run the tasks concurrently
  with concurrent.futures.ThreadPoolExecutor() as executor:
    # Submit the tasks for each date in the range
    futures = [executor.submit(run_concurrently, date) for date in date_range]
    # Wait for all tasks to complete
    concurrent.futures.wait(futures)

generating NSE stock data for date=2024-01-17
generating NSE stock data for date=2024-01-16


Unnamed: 0,SECTOR,DAILY_RETURN_std,DAILY_RETURN_mean,DAILY_RETURN_count,TRADEDATE
0,Basic Materials,0.022619,-0.005184,330,2024-01-17
1,Communication Services,0.039215,0.005485,71,2024-01-17
2,Consumer Cyclical,0.022837,-0.004131,371,2024-01-17
3,Consumer Defensive,0.027047,-0.00037,140,2024-01-17
4,Energy,0.014955,0.000888,35,2024-01-17
5,Financial Services,0.021383,-0.002792,284,2024-01-17
6,Healthcare,0.016931,-0.004468,129,2024-01-17
7,Industrials,0.023632,0.001193,388,2024-01-17
8,NOT_FOUND_ON_YAHOO,0.024618,-0.001349,643,2024-01-17
9,Real Estate,0.030142,0.003197,64,2024-01-17


generating NSE stock data for date=2024-01-15
generating NSE stock data for date=2024-01-14
generating NSE stock data for date=2024-01-13
generating NSE stock data for date=2024-01-12
generating NSE stock data for date=2024-01-11
generating NSE stock data for date=2024-01-10
generating NSE stock data for date=2024-01-09


ERROR:yfinance:404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/182D020524.NS?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=182D020524.NS&crumb=WiQLUvEnQXR
ERROR:yfinance:404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/1018GS2026.NS?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=1018GS2026.NS&crumb=WiQLUvEnQXR
ERROR:yfinance:404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/182D040724.NS?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=182D040724.NS&crumb=WiQLUvEnQXR
ERROR:yfinance:404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/18