In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import yfinance as yf

import plotly.graph_objects as go
from plotly.subplots import make_subplots
from bsedata.bse import BSE

In [2]:
bse = BSE(update_codes = True)

In [6]:
def scrape_companies(keyword, pages=10):
    # Base URL and headers with session ID and CSRF token
    base_url = "https://www.screener.in/full-text-search/"
    session_id = 'jbj4ibouxarr9trfu2zndv9ns07n8qnl'
    csrf_token = 'RBIDkDj3gz0wIf3EH0JGPgGdYX9tLizS'
    
    headers = {
        'Cookie': f'sessionid={session_id}',
        'X-CSRFToken': csrf_token
    }
    
    companies = []

    for page in range(1, pages + 1):
        url = f"{base_url}?q={keyword}&page={page}"
        response = requests.get(url, headers=headers)
        
        if response.status_code != 200:
            print(f"Failed to retrieve page {page}")
            continue

        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all blocks containing company information
        company_blocks = soup.find_all('div', class_='margin-top-20 margin-bottom-36')

        # Loop through each block and extract information
        for block in company_blocks:
            # Extract company name
            company_name_span = block.find('span', class_='hover-link ink-900')
            company_name = company_name_span.get_text(strip=True)
    
            # Extract company code from the href attribute of the <a> tag
            company_code = company_name_span.find_parent('a')['href'].split('/')[2]
    
            # Extract PDF link
            pdf_link = block.find('div', class_='font-size-17 font-weight-500').find('a')['href']
    
            # Extract comment text
            comment_div = block.find('div', class_='ink-700 font-size-16')
            comment_text = comment_div.get_text(separator=" ", strip=True)
    
            # Append to the list
            companies.append({
                'Company Name': company_name,
                'Company Code': company_code,
                'Comment': comment_text,
                'PDF Link': pdf_link
            })

    # Create a DataFrame
    df = pd.DataFrame(companies)
    return df

# Example usage:
keyword = "Artificial Intelligence"
df = scrape_companies(keyword, pages=10)

In [5]:
df

## PLOT ALL TIME HIGH STOCKS

In [5]:
# Define functions to download stock data, calculate indicators, plot data, and process stocks
def download_stock_data(symbol):
    return yf.download(symbol + ".BO", start=pd.to_datetime('today') - pd.DateOffset(365),
                       end=pd.to_datetime('today') + pd.DateOffset(1), progress=False)

def calculate_technical_indicators(data):
    data['5DMA'] = data['Close'].rolling(window=5).mean()
    data['20DMA'] = data['Close'].rolling(window=20).mean()
    data['50DMA'] = data['Close'].rolling(window=50).mean()
    data['100DMA'] = data['Close'].rolling(window=100).mean()
    data['diff'] = data['Close'] - data['Open']
    data['color'] = data['diff'].apply(lambda x: 'green' if x >= 0 else 'red')
    return data

def plot_stock_data(symbol, data, comment, pdf_link):
    # Prepare plot data
    plot_data = data[data.index > (pd.to_datetime('today') - pd.DateOffset(days=180))]

    # Setup subplot layout
    figure = make_subplots(specs=[[{"secondary_y": True}]])

    # Plot candlestick chart
    figure.add_trace(go.Candlestick(x=plot_data.index,
                                    open=plot_data['Open'],
                                    high=plot_data['High'],
                                    low=plot_data['Low'],
                                    close=plot_data['Close'],
                                    name='Price'))

    # Adjust Y-axis and layout for stock prices
    figure.update_yaxes(range=[plot_data['Close'].min()*0.9, plot_data['Close'].max()*1.05])
    figure.update_xaxes(rangebreaks=[dict(bounds=['sat', 'mon'])], row=1, col=1)  # hide weekends
    figure.update_layout(title={'text': symbol, 'x': 0.5}, xaxis_rangeslider_visible=False)

    # Plot moving averages and volume
    for ma, color in zip(['5DMA', '20DMA', '50DMA', '100DMA'], ['yellow', 'blue', 'orange', 'green']):
        figure.add_trace(go.Scatter(x=plot_data.index, y=plot_data[ma], marker_color=color, name=f'{ma}'))
    figure.add_trace(go.Bar(x=plot_data.index, y=plot_data['Volume'], name='Volume',
                            marker={'color': plot_data['color']}), secondary_y=True)
    
    # Hide the secondary Y-axis (volume)
    figure.update_yaxes(range=[0, plot_data['Volume'].max()*5], secondary_y=True)
    figure.update_yaxes(visible=False, secondary_y=True)
    
    # Add company comment and PDF link below the chart using annotations
    figure.update_layout(
        annotations=[
            dict(
                text=f"<b>Comment:</b> {comment}<br><b>PDF Link:</b> <a href='{pdf_link}' target='_blank'>{pdf_link}</a>",
                xref="paper", yref="paper", x=0, y=-0.2, showarrow=False, align='left',
                xanchor='left', yanchor='top'
            )
        ],
        margin={'b': 200}  # Extend the bottom margin to make room for the annotation
    )

    return figure

def process_stocks(all_symbols, comments, pdf_links):
    num_processed = 0
    file_count = 1
    while num_processed < len(all_symbols):
        figure_html = open(f'keyword_stocks_{file_count}.html', 'w')
        for idx, symbol in enumerate(all_symbols[num_processed:num_processed+100]):
            try:
                data = download_stock_data(symbol)
                data = calculate_technical_indicators(data)
                comment = comments[num_processed + idx]
                pdf_link = pdf_links[num_processed + idx]
                figure = plot_stock_data(symbol, data, comment, pdf_link)
                figure_html.write(figure.to_html(full_html=False))
            except Exception as e:
                print(f"Error processing symbol {symbol}: {e}")
        figure_html.close()
        num_processed += 100
        file_count += 1

if __name__ == "__main__":
    symbols = df['Company Code'].values
    comments = df['Comment'].values
    pdf_links = df['PDF Link'].values
    process_stocks(symbols, comments, pdf_links)
    print('Execution Completed!!!!')