# Install and Restart

In [0]:
%pip install google-api-python-client google-auth gspread openpyxl pyxirr
dbutils.library.restartPython()

# Imports and Authorization

In [0]:
from google.oauth2 import service_account
import gspread
from googleapiclient.discovery import build

SCOPES = [
    'https://www.googleapis.com/auth/spreadsheets',
    'https://www.googleapis.com/auth/drive'
]
KEY_FILE_PATH = '/Volumes/finance/raw/keys/databricks-drive-f6879bbdbe1c.json' 
creds = service_account.Credentials.from_service_account_file(
    KEY_FILE_PATH, scopes=SCOPES)
drive_service = build('drive', 'v3', credentials=creds)
gc = gspread.authorize(creds)
sheet_service = build('sheets', 'v4', credentials=creds)

import requests
from bs4 import BeautifulSoup

import time
from datetime import datetime
import pytz

import os
from openpyxl import load_workbook
from pyxirr import xirr
import pandas as pd

# Fetch Screener Data ~ 60 mins

In [0]:
class Screener():
  def __init__(self):
    self.base_url = 'https://www.screener.in'
    self.industry_to_url = {}
    self.stock_to_data = {}
    self.epoch_time = time.time()
    self.stock = None

  def get_industry_to_url(self):
    soup = BeautifulSoup(requests.get(self.base_url+'/market').text,'html.parser')
    for ele in soup.find_all('a', href=lambda href: href and href.startswith('/market/')):
      self.industry_to_url[ele.text] = self.base_url + ele['href']

  def get_price_and_ratios(self):
    for ele in self.stock_to_data[self.stock]['Soup'].find_all('span',{'class':'name'}):
      if ele.text.strip() == 'Current Price':
        self.stock_to_data[self.stock]['LTP'] = float(ele.find_next('span',{'class':'number'}).text.replace(',',''))
      elif ele.text.strip() == 'High / Low':
        self.stock_to_data[self.stock]['52W H'] = float(ele.find_all_next('span',{'class':'number'})[0].text.replace(',',''))
        self.stock_to_data[self.stock]['52W L'] = float(ele.find_all_next('span',{'class':'number'})[1].text.replace(',',''))
      elif ele.text.strip() == 'Stock P/E':
        self.stock_to_data[self.stock]['PE'] = float(ele.find_next('span',{'class':'number'}).text.replace(',','')) if ele.find_next('span',{'class':'number'}).text.replace(',','')!='' else 0

  def get_quarterly_results(self):
    section = self.stock_to_data[self.stock]['Soup'].find('section',{'id':'quarters'})
    if section:
      self.stock_to_data[self.stock]['Reported_Upto'] = section.find('table',{'class':'data-table responsive-text-nowrap'}).find('thead').find_all('th')[-1].text.strip()
      if section.find('span',{'class':'badge'}):
        self.stock_to_data[self.stock]['Upcoming_Date'] = section.find('span',{'class':'badge'}).find('strong').text

  def get_profit_loss(self):
    section = self.stock_to_data[self.stock]['Soup'].find('section',{'id':'profit-loss'})
    if section:
      for row in section.find('table',{'class':'data-table responsive-text-nowrap'}).find_all('tr'):
        if not self.stock_to_data[self.stock]['Reported_Upto']:
          self.stock_to_data[self.stock]['Reported_Upto'] = row.find_all('th')[-1].text.strip()
        button = row.find('button')
        if button:
          txt = button['onclick'].strip()
          type = txt[txt.find("('")+2:txt.find("',")]
        else:
          type = row.find('td').text.strip() if row.find('td') else None
        data = [float(col.text.strip().replace(',','').replace('%',''))
          for col in row.find_all('td')[1:] if col.text.strip()]
        if type in ['Sales','Revenue'] and len(data)>1:
          self.stock_to_data[self.stock]['Sales_TTM'] = data[-1]
        elif type in ['Net Profit'] and len(data)>1:
          self.stock_to_data[self.stock]['Profit_TTM'] = data[-1]
      for tbl in section.find_all('table',{'class':'ranges-table'}):
        th = tbl.find('th').text
        rows = tbl.find_all('td')
        if th=='Compounded Sales Growth':
          self.stock_to_data[self.stock]['Sales_Growth_10Y'] = rows[1].text.strip().replace('%','')
          self.stock_to_data[self.stock]['Sales_Growth_5Y'] = rows[3].text.strip().replace('%','')
          self.stock_to_data[self.stock]['Sales_Growth_3Y'] = rows[5].text.strip().replace('%','')
          self.stock_to_data[self.stock]['Sales_Growth_TTM'] = rows[7].text.strip().replace('%','')
        elif th=='Compounded Profit Growth':
          self.stock_to_data[self.stock]['Profit_Growth_10Y'] = rows[1].text.strip().replace('%','')
          self.stock_to_data[self.stock]['Profit_Growth_5Y'] = rows[3].text.strip().replace('%','')
          self.stock_to_data[self.stock]['Profit_Growth_3Y'] = rows[5].text.strip().replace('%','')
          self.stock_to_data[self.stock]['Profit_Growth_TTM'] = rows[7].text.strip().replace('%','')

  def get_margin_data(self):
    if self.stock_to_data[self.stock]['Sales_TTM'] not in [0,None]:
      self.stock_to_data[self.stock]['NPM_TTM'] = round((self.stock_to_data[self.stock]['Profit_TTM']/self.stock_to_data[self.stock]['Sales_TTM']),2)*100

  def get_fii_data(self):
    section = self.stock_to_data[self.stock]['Soup'].find('section',{'id':'shareholding'})
    if section:
      tbl = section.find('table',{'class':'data-table'})
      for row in tbl.find_all('tr'):
        button = row.find('button')
        if button:
          txt = button['onclick'].strip()
          type = txt[txt.find("('")+2:txt.find("',")]
        else:
          type = row.find('td').text.strip() if row.find('td') else None
        data = [float(col.text.strip().replace(',','').replace('%',''))
          for col in row.find_all('td')[1:] if col.text.strip()]
        if type == 'foreign_institutions' and len(data)>7:
          fii_ttm = round(sum(data[-4:])/4,2)
          fii_pttm = round(sum(data[-8:-4])/4,2)
          fii_ttm_pttm = fii_ttm-fii_pttm
          self.stock_to_data[self.stock]['FII_TTM_PTTM'] = fii_ttm_pttm

  def get_stock_to_data(self):
    for i, (industry, url) in enumerate(self.industry_to_url.items(), start=1):
      if i == 10:
        break
      time.sleep(1)
      stocks_url = {ele.text : self.base_url+ele['href']
        for ele in BeautifulSoup(requests.get(url).text,'html.parser').find_all('a', href=lambda href: href and href.startswith('/company/'))}
      for s, (self.stock, stock_url) in enumerate(stocks_url.items(), start=1):
        time.sleep(0.5)
        soup = BeautifulSoup(requests.get(stock_url).text,'html.parser')
        self.stock_to_data[self.stock] = {'url':stock_url,'Industry':industry,'Soup':soup, 'symbol':stock_url.split('/')[4],
                                          'LTP':None,'52W L':None,'52W H':None,'PE':None,
                                          'Reported_Upto':None,'Upcoming_Date':None,
                                          'Sales_TTM':None,'Profit_TTM':None,'NPM_TTM':None,
                                          'Sales_Growth_10Y':None,'Sales_Growth_5Y':None,'Sales_Growth_3Y':None,'Sales_Growth_TTM':None,
                                          'Profit_Growth_10Y':None,'Profit_Growth_5Y':None,'Profit_Growth_3Y':None,'Profit_Growth_TTM':None,
                                          'FII_TTM_PTTM':None}
        print(f"\r{industry}[{i}/{len(self.industry_to_url)}] {self.stock}[{s}/{len(stocks_url)}] ", end="", flush=True)
        self.get_price_and_ratios()
        self.get_quarterly_results()
        self.get_profit_loss()
        self.get_margin_data()
        self.get_fii_data()
        self.stock_to_data[self.stock]['Soup'] = None
        if not self.stock_to_data[self.stock]['LTP']:
          print(stock_url)

scraper = Screener()
scraper.get_industry_to_url()
scraper.get_stock_to_data()

# Update Screener Data

In [0]:
doc = gc.open('Screener Tracker')
stocks = doc.worksheet('Stocks')
stocks.clear()
stocks.clear_notes(['A','H','S'])
stocks.clear_basic_filter()
stocks_cells = []
stocks_notes = {}
stocks_cells.append(gspread.Cell(row=1,col=1,value='Stock'))
timestamp = datetime.fromtimestamp(scraper.epoch_time,tz=pytz.timezone('Asia/Kolkata')).strftime('%Y-%m-%d %H:%M:%S')
stocks_notes['A1'] = f'Timestamp : {timestamp}'
stocks_notes['E1'] = '(LTP-52WL) X 100\n--------------------------\n(52WH-52WL)'
headers = [
    'Last\nTraded\nPrice',
    '52\nWeek\nLow',
    '52\nWeek\nHigh',
    'Normal\nScore',
    'P/E',
    'Sales\nTTM\n(Cr)',
    'Profit\nTTM\n(Cr)',
    'NPM\nTTM\n%',
    'Sales\nCAGR\n10Y%',
    'Sales\nCAGR\n5Y%',
    'Sales\nCAGR\n3Y%',
    'Sales\nCAGR\nTTM%',
    'Profit\nCAGR\n10Y%',
    'Profit\nCAGR\n5Y%',
    'Profit\nCAGR\n3Y%',
    'Profit\nCAGR\nTTM%',
    'FIIs\nChange\nTTM',
    'Reported Upto/\nUpcoming Date',
    'Industry'
]
for idx, h in enumerate(headers, 2):
    stocks_cells.append(gspread.Cell(row=1, col=idx, value=h))

row_num = 1
for stock, data in scraper.stock_to_data.items():
    row_num += 1
    stocks_cells.append(gspread.Cell(row=row_num,col=1,value='=HYPERLINK("' + str(data['url']) + '","' + str(data['symbol']) + '")'))
    stocks_notes[f'A{row_num}'] = stock
    vals = [
        str(data['LTP']),
        int(data['52W L']),
        int(data['52W H']),
        round((data['LTP']-data['52W L'])/(data['52W H']-data['52W L']) if data.get('52W H') and (data['52W H']-data['52W L'])!=0 else 0,2)*100,
        data['PE'],
        data['Sales_TTM'],
        data['Profit_TTM'],
        data['NPM_TTM'],
        data['Sales_Growth_10Y'],
        data['Sales_Growth_5Y'],
        data['Sales_Growth_3Y'],
        data['Sales_Growth_TTM'],
        data['Profit_Growth_10Y'],
        data['Profit_Growth_5Y'],
        data['Profit_Growth_3Y'],
        data['Profit_Growth_TTM'],
        data['FII_TTM_PTTM'],
        data['Upcoming_Date'] if data['Upcoming_Date'] else data['Reported_Upto'],
        data['Industry']
    ]
    
    for i, val in enumerate(vals, 2):
        stocks_cells.append(gspread.Cell(row=row_num, col=i, value=val))

stocks.update_cells(stocks_cells, value_input_option='USER_ENTERED')
stocks.update_notes(stocks_notes)

# Update Zerodha Data

In [0]:
from helpers import *
# 1. Resolve Folder IDs
try:
    ids = get_root_finance_structure()
    print(f"Found Folder IDs: {ids}")
except Exception as e:
    print(f"Stopped: {e}")
    ids = None

if ids:
    # --- Dividend Statement ---
    dividend_statement = [['Symbol','Date','Net Amount','Year-Month']]
    account = None
    
    # List files in Dividend folder
    files = list_files_in_folder(ids['dividend'])
    
    for file in files:
        if account is None and '-' in file['name']:
            account = file['name'].split('-')[1] # Assuming format like 'Name-Account-...'
            
        print(f"Processing Dividend: {file['name']}")
        
        # Download file to memory
        file_content = download_file_content(file['id'])
        
        # Open with openpyxl
        workbook = load_workbook(file_content)
        worksheet = workbook['Equity Dividends']
        
        found_header = False
        for row in worksheet.iter_rows(values_only=True):
            if not found_header:
                if row and 'Symbol' in row: found_header = True
                continue
            if row[1] == 'Total Dividend Amount': break
            
            # Logic from original nb
            date_val = row[3] # Ensure date format is handled if it comes as datetime object
            if isinstance(date_val, datetime): date_val = date_val.strftime('%Y-%m-%d')
                
            dividend_statement.append([
                str(row[1]).replace('#','').replace('6',''), # Symbol cleanup
                date_val, 
                row[6], 
                str(date_val)[:7]
            ])
            
    # Sort
    dividend_statement[1:] = sorted(dividend_statement[1:], key=lambda row: datetime.strptime(row[1], "%Y-%m-%d"), reverse=True)

    # --- Contract Notes ---
    contract_note = [['Symbol','Date','Net Amount','Year-Month']]
    files = list_files_in_folder(ids['contract'])
    
    for file in files:
        # print(f"Processing Contract: {file['name']}")
        file_content = download_file_content(file['id'])
        workbook = load_workbook(file_content)
        
        for worksheet in workbook.worksheets:
            header_found = False
            skip_next_row = False
            try:
                sheet_date = datetime.strptime(worksheet.title, "%d-%m-%Y").strftime("%Y-%m-%d")
            except:
                continue # Skip sheets that don't look like dates
                
            for row in worksheet.iter_rows(values_only=True):
                if not header_found:
                    if row and row[0] == 'Order No.':
                        header_found = True
                        skip_next_row = True
                    continue
                if skip_next_row:
                    skip_next_row = False
                    continue
                if not row[0] or row[0] == 'PAY IN / PAY OUT OBLIGATION':
                    break
                
                # Logic from original nb
                symbol_raw = row[4].split(' - ')[0]
                contract_note.append([
                    symbol_raw, 
                    sheet_date,
                    row[11] if row[5]=='buy' else row[12], 
                    sheet_date[:7]
                ])
                
    contract_note[1:] = sorted(contract_note[1:], key=lambda row: datetime.strptime(row[1], "%Y-%m-%d"), reverse=True)

    # --- Holdings CSV ---
    holdings = [['Symbol','Quantity','Invested','Current']]
    try:
        holdings_file_id = get_file_id(ids['zerodha'], 'holdings.csv')
        csv_content = download_file_content(holdings_file_id).read().decode('utf-8')
        lines = csv_content.splitlines()
        
        for line in lines[1:]:
            parts = line.strip().split(',')
            if len(parts) > 5:
                holdings.append([parts[0].replace('"',''), parts[1], parts[4], parts[5]])
    except Exception as e:
        print(f"Warning: holdings.csv not found or error reading: {e}")

    # --- Capital Gains Logic (Pure Python, mostly unchanged) ---
    capital_gains = [['Symbol','Date','Invested','Unrealised','Realised','XIRR']]
    symbol_to_contracts = {}
    symbol_to_holdings = {}
    
    for contract in contract_note[1:]:
      symbol = contract[0]
      if symbol not in symbol_to_contracts: symbol_to_contracts[symbol] = []
      symbol_to_contracts[symbol].append(contract)
      
    for holding in holdings[1:]:
      symbol_to_holdings[holding[0]] = holding
      
    for symbol,contracts in symbol_to_contracts.items():
      holdings_invested = 0.0
      holdings_current = 0.0
      contracts_invested_held = 0.0
      contracts_invested_sold = 0.0
      contracts_sold = 0.0
      dates = []
      flows = []
      
      if symbol in symbol_to_holdings:
        holdings_invested = float(symbol_to_holdings[symbol][2])
        holdings_current = float(symbol_to_holdings[symbol][3])
        
      for c in contracts:
        amt = float(c[2])
        d_obj = datetime.strptime(c[1], "%Y-%m-%d").date()
        
        if amt < 0: # Buy
          if holdings_invested > contracts_invested_held:
            contracts_invested_held += amt
            unrealised = (abs(amt)/holdings_invested)*holdings_current if holdings_invested else 0
            
            try:
                xirr_per = round(xirr([d_obj, datetime.today().date()], [amt, unrealised])*100, 2)
            except: xirr_per = 0
            
            capital_gains.append([symbol, c[1], amt, unrealised+amt, 0, xirr_per])
          else:
            dates.append(d_obj)
            flows.append(amt)
            contracts_invested_sold += amt
        else: # Sell
          dates.append(d_obj)
          flows.append(amt)
          contracts_sold += amt
          
      if contracts_sold > 0:
        realised = contracts_sold + contracts_invested_sold
        try:
            xirr_per = round(xirr(dates, flows)*100, 2)
        except: xirr_per = 0
        capital_gains.append([symbol, None, contracts_invested_sold, 0, realised, xirr_per])

    # Final Write to Sheets
    try:
        doc = gc.open('Personal Finance')
        sheet_to_data = {
            'Dividend Statement': dividend_statement,
            'Contract Note': contract_note,
            'Holdings': holdings,
            'Capital Gains': capital_gains
        }
        for sheet, data in sheet_to_data.items():
            worksheet = doc.worksheet(sheet)
            worksheet.clear()
            worksheet.update('A1', data) # gspread update syntax
        print("Personal Finance Sheet Updated.")
    except Exception as e:
        print(f"Error updating Personal Finance sheet: {e}")