In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re # For regular expressions to extract dates and details
from datetime import datetime # For getting today's date

def scrape_dse_news_multiple_pages_corrected(base_url, session_cookie=None, max_pages=1, delay=1):
    """
    Scrapes news data, identifies Board Meeting Schedule items based on LR references,
    extracts relevant details, and uses a predefined sector list.
    Saves all data to an Excel file with separate sheets for All News, LR 16(1), and LR 19(1),
    named with today's date. Applies date formatting and sentence case.
    """
    all_news_data = []
    board_meeting_data = []

    headers = {
        "Accept": "text/html, */*; q=0.01",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "en-US,en;q=0.9",
        "Connection": "keep-alive",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "Host": "www.dse.com.bd",
        "Origin": "https://www.dse.com.bd",
        "Referer": "https://www.dse.com.bd/display_news.php",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36 Edg/141.0.0.0",
        "X-KL-kes-Ajax-Request": "Ajax_Request",
        "X-Requested-With": "XMLHttpRequest",
        "sec-ch-ua": '"Microsoft Edge";v="141", "Not?A_Brand";v="8", "Chromium";v="141"',
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": '"Windows"',
    }

    cookies = {}
    if session_cookie:
         cookies['PHPSESSID'] = session_cookie

    # --- Predefined Sector Mapping ---
    sector_mapping = {
        "1JANATAMF": "M. Fund", "1STBSRS": "", "1STICB": "", "1STPRIMFMF": "M. Fund", "2NDICB": "",
        "3RDICB": "", "4THICB": "", "5THICB": "", "6THICB": "", "7THICB": "", "8THICB": "",
        "AAMRANET": "IT", "AAMRATECH": "IT", "ABB1STMF": "M. Fund", "ABBANK": "Bank", "ACFL": "Textile",
        "ACI": "Pharma", "ACIFORMULA": "Pharma", "ACIZCBOND": "", "ACMELAB": "Pharma", "ACMEPL": "Pharma",
        "ACTIVEFINE": "Pharma", "ADNTEL": "IT", "ADVENT": "Pharma", "AFCAGRO": "Pharma", "AFTABAUTO": "Engineering",
        "AGNISYSL": "IT", "AGRANINS": "General Insurance", "AIBL1STIMF": "M. Fund", "AIL": "Textile",
        "AIMS1STMF": "", "ALARABANK": "Bank", "AL-HAJTEX": "Textile", "ALIF": "Textile", "ALLTEX": "Textile",
        "AMANFEED": "Miscellaneous", "AMBEEPHA": "Pharma", "AMCL(PRAN)": "Food & Allied", "ANLIMAYARN": "Textile",
        "ANWARGALV": "Engineering", "AOL": "Fuel & Power", "APEXFOODS": "Food & Allied", "APEXFOOT": "Tannery",
        "APEXSPINN": "Textile", "APEXTANRY": "Tannery", "APOLOISPAT": "Engineering", "APSCLBOND": "BOND",
        "ARAMIT": "Miscellaneous", "ARAMITCEM": "Cement", "ARGONDENIM": "Textile", "ASIAINS": "General Insurance",
        "ASIAPACINS": "General Insurance", "ASIATICLAB": "Pharma", "ATCSLGF": "M. Fund", "ATLASBANG": "Engineering",
        "AZIZPIPES": "Engineering", "BANGAS": "Food & Allied", "BANKASIA": "Bank", "BARKAPOWER": "Fuel & Power",
        "BATASHOE": "Tannery", "BATBC": "Food & Allied", "BAYLEASING": "NBFI", "BBS": "Engineering",
        "BBSCABLES": "Engineering", "BDAUTOCA": "Engineering", "BDCOM": "IT", "BDFINANCE": "NBFI",
        "BDLAMPS": "Engineering", "BDSERVICE": "Travel & Leisure", "BDTHAI": "Engineering", "BDTHAIFOOD": "Food & Allied",
        "BDWELDING": "Fuel & Power", "BEACHHATCH": "Food & Allied", "BEACONPHAR": "Pharma", "BENGALWTL": "Engineering",
        "BERGERPBL": "Miscellaneous", "BESTHLDNG": "Travel & Leisure", "BEXIMCO": "Miscellaneous", "BGIC": "General Insurance",
        "BIFC": "NBFI", "BNICL": "General Insurance", "BPML": "Paper & Printing", "BPPL": "Fuel & Power",
        "BRACBANK": "Bank", "BRACSCBOND": "", "BSC": "Miscellaneous", "BSCPLC": "Telecom", "BSRMLTD": "Engineering",
        "BSRMSTEEL": "Engineering", "BXPHARMA": "Pharma", "BXSYNTH": "", "CAPITECGBF": "M. Fund",
        "CAPMBDBLMF": "M. Fund", "CAPMIBBLMF": "M. Fund", "CENTRALINS": "General Insurance", "CENTRALPHL": "Pharma",
        "CITYBANK": "Bank", "CITYGENINS": "General Insurance", "CLICL": "Life Insurance", "CNATEX": "Textile",
        "CONFIDCEM": "Cement", "CONTININS": "General Insurance", "COPPERTECH": "Engineering", "CROWNCEMNT": "Cement",
        "CRYSTALINS": "General Insurance", "CVOPRL": "Fuel & Power", "DACCADYE": "Textile", "DAFODILCOM": "IT",
        "DBH": "NBFI", "DBH1STMF": "M. Fund", "DELTALIFE": "Life Insurance", "DELTASPINN": "Textile",
        "DESCO": "Fuel & Power", "DESHBANDHU": "Engineering", "DGIC": "General Insurance", "DHAKABANK": "Bank",
        "DHAKAINS": "General Insurance", "DOMINAGE": "Engineering", "DOREENPWR": "Fuel & Power", "DSHGARME": "Textile",
        "DSSL": "Textile", "DULAMIACOT": "Textile", "DUTCHBANGL": "Bank", "EASTERNINS": "General Insurance",
        "EASTLAND": "General Insurance", "EASTRNLUB": "Fuel & Power", "EBL": "Bank", "EBL1STMF": "M. Fund",
        "EBLNRBMF": "M. Fund", "ECABLES": "Engineering", "EGEN": "IT", "EHL": "Service & RE", "EIL": "General Insurance",
        "EMERALDOIL": "Food & Allied", "ENVOYTEX": "Textile", "EPGL": "Fuel & Power", "ESQUIRENIT": "Textile",
        "ETL": "Textile", "EXIM1STMF": "M. Fund", "EXIMBANK": "Bank", "FAMILYTEX": "Textile", "FARCHEM": "Pharma",
        "FAREASTFIN": "NBFI", "FAREASTLIF": "Life Insurance", "FASFIN": "NBFI", "FBFIF": "M. Fund",
        "FEDERALINS": "General Insurance", "FEKDIL": "Textile", "FINEFOODS": "Food & Allied", "FIRSTFIN": "NBFI",
        "FIRSTSBANK": "Bank", "FORTUNE": "Tannery", "FUWANGCER": "Ceramics", "FUWANGFOOD": "Food & Allied",
        "GBBPOWER": "Fuel & Power", "GEMINISEA": "Food & Allied", "GENEXIL": "IT", "GENNEXT": "Textile",
        "GHAIL": "Food & Allied", "GHCL": "Pharma", "GIB": "Bank", "GLDNJMF": "M. Fund", "GLOBALINS": "General Insurance",
        "GOLDENSON": "Engineering", "GP": "Telecom", "GPHISPAT": "Engineering", "GQBALLPEN": "Miscellaneous",
        "GRAMEEN1": "", "GRAMEENS2": "M. Fund", "GREENDELMF": "M. Fund", "GREENDELT": "General Insurance",
        "GSPFINANCE": "NBFI", "HAKKANIPUL": "Paper & Printing", "HAMI": "Pharma", "HEIDELBCEM": "Cement",
        "HFL": "Textile", "HRTEX": "Textile", "HWAWELLTEX": "Textile", "IBBLPBOND": "Bond", "IBNSINA": "Pharma",
        "IBP": "Pharma", "ICB": "NBFI", "ICB1STNRB": "", "ICB2NDNRB": "", "ICB3RDNRB": "M. Fund",
        "ICBAGRANI1": "M. Fund", "ICBAMCL1ST": "", "ICBAMCL2ND": "M. Fund", "ICBEPMF1S1": "M. Fund", "ICBIBANK": "Bank",
        "ICBISLAMIC": "", "ICBSONALI1": "M. Fund", "ICICL": "General Insurance", "IDLC": "NBFI", "IFADAUTOS": "Engineering",
        "IFIC": "Bank", "IFIC1STMF": "M. Fund", "IFILISLMF1": "M. Fund", "ILFSL": "NBFI", "INDEXAGRO": "Miscellaneous",
        "INTECH": "IT", "INTRACO": "Fuel & Power", "IPDC": "NBFI", "ISLAMIBANK": "Bank", "ISLAMICFIN": "NBFI",
        "ISLAMIINS": "General Insurance", "ISNLTD": "IT", "ITC": "IT", "JAMUNABANK": "Bank", "JAMUNAOIL": "Fuel & Power",
        "JANATAINS": "General Insurance", "JHRML": "Miscellaneous", "JMISMDL": "Pharma", "JUTESPINN": "Jute",
        "KARNAPHULI": "General Insurance", "KAY&QUE": "Engineering", "KBPPWBIL": "Miscellaneous", "KDSALTD": "Engineering",
        "KEYACOSMET": "Pharma", "KOHINOOR": "Pharma", "KPCL": "Fuel & Power", "KPPL": "Paper & Printing",
        "KTL": "Textile", "LANKABAFIN": "NBFI", "LEGACYFOOT": "Tannery", "LHB": "Cement", "LIBRAINFU": "Pharma",
        "LINDEBD": "Fuel & Power", "LOVELLO": "Food & Allied", "LRBDL": "Fuel & Power", "LRGLOBMF1": "M. Fund",
        "MAGURAPLEX": "Paper & Printing", "MAKSONSPIN": "Textile", "MALEKSPIN": "Textile", "MARICO": "Pharma",
        "MATINSPINN": "Textile", "MBL1STMF": "M. Fund", "MEGCONMILK": "Food & Allied", "MEGHNACEM": "Cement",
        "MEGHNAINS": "General Insurance", "MEGHNALIFE": "Life Insurance", "MEGHNAPET": "Food & Allied", "MERCANBANK": "Bank",
        "MERCINS": "General Insurance", "METROSPIN": "Textile", "MHSML": "Textile", "MIDASFIN": "NBFI",
        "MIDLANDBNK": "Bank", "MIRACLEIND": "Miscellaneous", "MIRAKHTER": "Engineering", "MITHUNKNIT": "Textile",
        "MJLBD": "Fuel & Power", "MLDYEING": "Textile", "MODERNDYE": "Textile", "MONNOAGML": "Engineering",
        "MONNOCERA": "Ceramics", "MONNOFABR": "Textile", "MONOSPOOL": "Paper & Printing", "MPETROLEUM": "Fuel & Power",
        "MTB": "Bank", "NAHEEACP": "Engineering", "NATLIFEINS": "Life Insurance", "NAVANACNG": "Engineering",
        "NAVANAPHAR": "Pharma", "NBL": "Bank", "NCCBANK": "Bank", "NCCBLMF1": "M. Fund", "NEWLINE": "Textile",
        "NFML": "Miscellaneous", "NHFIL": "NBFI", "NITOLINS": "General Insurance", "NLI1STMF": "M. Fund",
        "NORTHERN": "Jute", "NORTHRNINS": "General Insurance", "NPOLYMER": "Engineering", "NRBBANK": "Bank",
        "NRBCBANK": "Bank", "NTC": "Food & Allied", "NTLTUBES": "Engineering", "NURANI": "Textile", "OAL": "Engineering",
        "OIMEX": "Engineering", "OLYMPIC": "Food & Allied", "ONEBANKPLC": "Bank", "ORIONINFU": "Pharma",
        "ORIONPHARM": "Pharma", "PADMALIFE": "Life Insurance", "PADMAOIL": "Fuel & Power", "PARAMOUNT": "General Insurance",
        "PDL": "Textile", "PENINSULA": "Travel & Leisure", "PEOPLESINS": "General Insurance", "PF1STMF": "M. Fund",
        "PHARMAID": "Pharma", "PHENIXINS": "General Insurance", "PHOENIXFIN": "NBFI", "PHPMF1": "M. Fund",
        "PIONEERINS": "General Insurance", "PLFSL": "NBFI", "POPULAR1MF": "M. Fund", "POPULARLIF": "Life Insurance",
        "POWERGRID": "Fuel & Power", "PRAGATIINS": "General Insurance", "PRAGATILIF": "Life Insurance",
        "PREMIERBAN": "Bank", "PREMIERCEM": "Cement", "PREMIERLEA": "NBFI", "PRIME1ICBA": "M. Fund", "PRIMEBANK": "Bank",
        "PRIMEFIN": "NBFI", "PRIMEINSUR": "General Insurance", "PRIMELIFE": "Life Insurance", "PRIMETEX": "Textile",
        "PROGRESLIF": "Life Insurance", "PROVATIINS": "General Insurance", "PTL": "Textile", "PUBALIBANK": "Bank",
        "PURABIGEN": "General Insurance", "QUASEMIND": "Engineering", "QUEENSOUTH": "Textile", "RAHIMAFOOD": "Food & Allied",
        "RAHIMTEXT": "Textile", "RAKCERAMIC": "Ceramics", "RANFOUNDRY": "Engineering", "RDFOOD": "Food & Allied",
        "RECKITTBEN": "Pharma", "REGENTTEX": "Textile", "RELIANCE1": "M. Fund", "RELIANCINS": "General Insurance",
        "RENATA": "Pharma", "RENWICKJA": "Engineering", "REPUBLIC": "General Insurance", "RINGSHINE": "Textile",
        "ROBI": "Telecom", "RSRMSTEEL": "Engineering", "RUNNERAUTO": "Engineering", "RUPALIBANK": "Bank",
        "RUPALIINS": "General Insurance", "RUPALILIFE": "Life Insurance", "SAFKOSPINN": "Textile", "SAIFPOWER": "Services & Real Estate",
        "SAIHAMCOT": "Textile", "SAIHAMTEX": "Textile", "SALAMCRST": "Engineering", "SALVOCHEM": "Pharma",
        "SAMATALETH": "Tannery", "SAMORITA": "Service & RE", "SANDHANINS": "Life Insurance", "SAPORTL": "Service & RE",
        "SAVAREFR": "Miscellaneous", "SBACBANK": "Bank", "SEAPEARL": "Travel & Leisure", "SEBL1STMF": "M. Fund",
        "SEMLFBSLGF": "M. Fund", "SEMLIBBLSF": "M. Fund", "SEMLLECMF": "M. Fund", "SHAHJABANK": "Bank",
        "SHARPIND": "Textile", "SHASHADNIM": "Textile", "SHEPHERD": "Textile", "SHURWID": "Engineering",
        "SHYAMPSUG": "Food & Allied", "SIBL": "Bank", "SILCOPHL": "Pharma", "SILVAPHL": "Pharma", "SIMTEX": "Textile",
        "SINGERBD": "Engineering", "SINOBANGLA": "Miscellaneous", "SIPLC": "General Insurance", "SKTRIMS": "Miscellaneous",
        "SONALIANSH": "Jute", "SONALILIFE": "Life Insurance", "SONALIPAPR": "Paper & Printing", "SONARBAINS": "General Insurance",
        "SONARGAON": "Textile", "SOUTHEASTB": "Bank", "SPCERAMICS": "Ceramics", "SPCL": "Fuel & Power",
        "SQUARETEXT": "Textile", "SQURPHARMA": "Pharma", "SSSTEEL": "Engineering", "STANCERAM": "Ceramics",
        "STANDARINS": "General Insurance", "STANDBANKL": "Bank", "STYLECRAFT": "Textile", "SUMITPOWER": "Fuel & Power",
        "SUNLIFEINS": "Life Insurance", "TAKAFULINS": "General Insurance", "TALLUSPIN": "Textile", "TAMIJTEX": "Textile",
        "TECHNODRUG": "Pharma", "TILIL": "Life Insurance", "TITASGAS": "Fuel & Power", "TOSRIFA": "Textile",
        "TRUSTB1MF": "M. Fund", "TRUSTBANK": "Bank", "TUNGHAI": "Textile", "UCB": "Bank", "UNILEVERCL": "Food & Allied",
        "UNIONBANK": "Bank", "UNIONCAP": "NBFI", "UNIONINS": "General Insurance", "UNIQUEHRL": "Travel & Leisure",
        "UNITEDAIR": "", "UNITEDFIN": "NBFI", "UNITEDINS": "General Insurance", "UPGDCL": "Fuel & Power",
        "USMANIAGL": "Miscellaneous", "UTTARABANK": "Bank", "UTTARAFIN": "NBFI", "VAMLBDMF1": "M. Fund",
        "VAMLRBBF": "M. Fund", "VFSTDL": "Textile", "WALTONHIL": "Engineering", "WATACHEM": "Pharma",
        "WMSHIPYARD": "Engineering", "YPL": "Engineering", "ZAHEENSPIN": "Textile", "ZAHINTEX": "Textile",
        "ZEALBANGLA": "Food & Allied"
    }


    # --- Step 1: Scrape all News Data ---
    print("Step 1: Scraping all news data...")
    for page_num in range(1, max_pages + 1):
        print(f"  Scraping page {page_num} (using empty POST data)...")

        post_data = {} # Using empty data based on successful debug result

        try:
            response = requests.post(base_url, headers=headers, cookies=cookies, data=post_data)
            response.raise_for_status()

            soup = BeautifulSoup(response.text, 'html.parser')
            rows = soup.find_all('tr')

            if not rows:
                 print(f"    No <tr> tags found on page {page_num}. Stopping.")
                 break

            current_item = {}
            item_count = 0
            for row in rows:
                th = row.find('th')
                td = row.find('td')

                if th and td:
                    header_text = th.get_text(strip=True)
                    cell_text = td.get_text(strip=True)

                    if "Trading Code" in header_text:
                        current_item["Trading Code"] = cell_text
                    elif "News Title" in header_text:
                        current_item["News Title"] = cell_text
                    elif "News" in header_text:
                        current_item["News"] = cell_text
                    elif "Post Date" in header_text:
                        current_item["Post Date"] = cell_text
                        if len(current_item) == 4:
                            all_news_data.append(current_item.copy())
                            # --- Check for Board Meeting Schedule ---
                            news_title = current_item["News Title"]
                            # Look for LR 16(1) or LR 19(1) in the title
                            lr_match = re.search(r'LR\s*(?:16\(1\)|19\(1\))', news_title, re.IGNORECASE)
                            if lr_match:
                                lr_ref = lr_match.group(0).strip() # Get the matched string like "LR 16(1)"
                                trading_code = current_item["Trading Code"]
                                full_news_text = current_item["News"]

                                # Extract meeting date using regex
                                # Pattern looks for common date formats like "Month DD, YYYY" or "DD Month YYYY" or "DD/MM/YYYY" or "YYYY-MM-DD"
                                date_pattern = r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}|' \
                                               r'\d{1,2}\s+(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{4}|' \
                                               r'\d{1,2}/\d{1,2}/\d{4}|' \
                                               r'\d{4}-\d{2}-\d{2}'
                                date_match = re.search(date_pattern, full_news_text)
                                meeting_date_str = date_match.group(0) if date_match else "Date Not Found"

                                # Extract details (text between "to consider, among others," and the first period after it)
                                details_pattern = r'to consider,\s*among others,\s*(.*?)(?:\.|\n|$)'
                                details_match = re.search(details_pattern, full_news_text, re.IGNORECASE)
                                details_text = details_match.group(1).strip() if details_match else "Details Not Found"

                                # Get sector from the predefined mapping
                                sector = sector_mapping.get(trading_code, "Sector Not Found")

                                board_meeting_data.append({
                                    "SL": len(board_meeting_data) + 1, # Serial number is the current length + 1
                                    "Company": trading_code,
                                    "LR Reference": lr_ref,
                                    "Meeting Schedule": meeting_date_str, # Store as string initially for processing
                                    "Sector": sector,
                                    "Details": details_text
                                })
                                print(f"    -> Identified Board Meeting for {trading_code} under {lr_ref}")

                            current_item = {}
                            item_count += 1

            print(f"  -> Parsed {item_count} items from page {page_num}.")

            if item_count == 0:
                 print(f"  Warning: No complete news items found on page {page_num}.")
                 break

        except requests.exceptions.RequestException as e:
            print(f"  An error occurred while fetching news page {page_num}: {e}")
            break
        except Exception as e:
            print(f"  An unexpected error occurred on news page {page_num}: {e}")
            break

        if delay > 0:
            time.sleep(delay)

    # --- Step 2: Process Board Meeting Data (Separate, Date Conversion, Sorting, Formatting) ---
    print("\nStep 2: Processing Board Meeting data (separating by LR, sorting, formatting)...")
    lr16_data = []
    lr19_data = []

    if board_meeting_data:
        for item in board_meeting_data:
            if "16(1)" in item["LR Reference"]:
                lr16_data.append(item)
            elif "19(1)" in item["LR Reference"]:
                lr19_data.append(item)

        # Process LR 16(1) data
        if lr16_data:
            lr16_df = pd.DataFrame(lr16_data, columns=["SL", "Company", "LR Reference", "Meeting Schedule", "Sector", "Details"])
            # Convert 'Meeting Schedule' column to datetime objects for sorting
            lr16_df['Meeting Schedule Datetime'] = pd.to_datetime(lr16_df['Meeting Schedule'], format='%B %d, %Y', errors='coerce')
            mask = lr16_df['Meeting Schedule Datetime'].isna()
            if mask.any():
                lr16_df.loc[mask, 'Meeting Schedule Datetime'] = pd.to_datetime(lr16_df.loc[mask, 'Meeting Schedule'], format='%d %B %Y', errors='coerce')
            mask = lr16_df['Meeting Schedule Datetime'].isna()
            if mask.any():
                lr16_df.loc[mask, 'Meeting Schedule Datetime'] = pd.to_datetime(lr16_df.loc[mask, 'Meeting Schedule'], format='%d/%m/%Y', errors='coerce')
            mask = lr16_df['Meeting Schedule Datetime'].isna()
            if mask.any():
                lr16_df.loc[mask, 'Meeting Schedule Datetime'] = pd.to_datetime(lr16_df.loc[mask, 'Meeting Schedule'], format='%Y-%m-%d', errors='coerce')

            lr16_df.sort_values(by='Meeting Schedule Datetime', ascending=True, inplace=True)
            lr16_df.reset_index(drop=True, inplace=True)
            lr16_df['Details'] = lr16_df['Details'].apply(lambda x: x.capitalize() if pd.notna(x) else x)
            lr16_df['Meeting Schedule'] = lr16_df['Meeting Schedule Datetime'].apply(lambda x: x.strftime('%d-%b-%y') if pd.notna(x) else x)
            lr16_df.drop(columns=['Meeting Schedule Datetime'], inplace=True)
            lr16_df['SL'] = range(1, len(lr16_df) + 1)
            print(f"  -> Processed {len(lr16_df)} LR 16(1) board meeting entries.")
        else:
            lr16_df = pd.DataFrame(columns=["SL", "Company", "LR Reference", "Meeting Schedule", "Sector", "Details"]) # Create empty DataFrame with correct columns
            print("  -> No LR 16(1) data found.")

        # Process LR 19(1) data
        if lr19_data:
            lr19_df = pd.DataFrame(lr19_data, columns=["SL", "Company", "LR Reference", "Meeting Schedule", "Sector", "Details"])
            # Convert 'Meeting Schedule' column to datetime objects for sorting
            lr19_df['Meeting Schedule Datetime'] = pd.to_datetime(lr19_df['Meeting Schedule'], format='%B %d, %Y', errors='coerce')
            mask = lr19_df['Meeting Schedule Datetime'].isna()
            if mask.any():
                lr19_df.loc[mask, 'Meeting Schedule Datetime'] = pd.to_datetime(lr19_df.loc[mask, 'Meeting Schedule'], format='%d %B %Y', errors='coerce')
            mask = lr19_df['Meeting Schedule Datetime'].isna()
            if mask.any():
                lr19_df.loc[mask, 'Meeting Schedule Datetime'] = pd.to_datetime(lr19_df.loc[mask, 'Meeting Schedule'], format='%d/%m/%Y', errors='coerce')
            mask = lr19_df['Meeting Schedule Datetime'].isna()
            if mask.any():
                lr19_df.loc[mask, 'Meeting Schedule Datetime'] = pd.to_datetime(lr19_df.loc[mask, 'Meeting Schedule'], format='%Y-%m-%d', errors='coerce')

            lr19_df.sort_values(by='Meeting Schedule Datetime', ascending=True, inplace=True)
            lr19_df.reset_index(drop=True, inplace=True)
            lr19_df['Details'] = lr19_df['Details'].apply(lambda x: x.capitalize() if pd.notna(x) else x)
            lr19_df['Meeting Schedule'] = lr19_df['Meeting Schedule Datetime'].apply(lambda x: x.strftime('%d-%b-%y') if pd.notna(x) else x)
            lr19_df.drop(columns=['Meeting Schedule Datetime'], inplace=True)
            lr19_df['SL'] = range(1, len(lr19_df) + 1)
            print(f"  -> Processed {len(lr19_df)} LR 19(1) board meeting entries.")
        else:
            lr19_df = pd.DataFrame(columns=["SL", "Company", "LR Reference", "Meeting Schedule", "Sector", "Details"]) # Create empty DataFrame with correct columns
            print("  -> No LR 19(1) data found.")
    else:
        print("  -> No board meeting data found.")
        lr16_df = pd.DataFrame(columns=["SL", "Company", "LR Reference", "Meeting Schedule", "Sector", "Details"])
        lr19_df = pd.DataFrame(columns=["SL", "Company", "LR Reference", "Meeting Schedule", "Sector", "Details"])


    # --- Step 3: Save to Excel with Today's Date in Filename ---
    print("\nStep 3: Saving data to Excel...")
    # Get today's date in YYYY-MM-DD format
    today_str = datetime.today().strftime('%Y-%m-%d')
    output_filename = f"LR_News_{today_str}.xlsx" # Construct the filename

    try:
        with pd.ExcelWriter(output_filename, engine='openpyxl') as writer:
            # Write main news data to 'All News' sheet
            if all_news_data:
                news_df = pd.DataFrame(all_news_data, columns=["Trading Code", "News Title", "News", "Post Date"])
                news_df.to_excel(writer, sheet_name='News', index=False)
                print(f"  -> Saved {len(all_news_data)} news items to 'All News' sheet.")
            else:
                print("  -> No news data to save to 'News' sheet.")

            # Write processed LR 16(1) data to 'LR 16(1)' sheet
            if not lr16_df.empty:
                lr16_df.to_excel(writer, sheet_name='LR 16(1)', index=False)
                print(f"  -> Saved {len(lr16_df)} sorted & formatted LR 16(1) items to 'LR 16(1)' sheet.")
            else:
                print("  -> No processed LR 16(1) data to save to 'LR 16(1)' sheet.")

            # Write processed LR 19(1) data to 'LR 19(1)' sheet
            if not lr19_df.empty:
                lr19_df.to_excel(writer, sheet_name='LR 19(1)', index=False)
                print(f"  -> Saved {len(lr19_df)} sorted & formatted LR 19(1) items to 'LR 19(1)' sheet.")
            else:
                print("  -> No processed LR 19(1) data to save to 'LR 19(1)' sheet.")

        print(f"\nSuccessfully saved all data to {output_filename}")

    except Exception as e:
        print(f"An error occurred while saving the Excel file: {e}")

if __name__ == "__main__":
    news_url = "https://www.dsebd.org/old_news.php?startDate=2025-11-10&endDate=2025-11-10&criteria=4&archive=news" # Removed extra spaces

    session_id = None # e.g., "aggc96j657rag8iva3du5lc4b3"

    # For the news endpoint, it seems to return the same batch with empty POST data.
    # So max_pages=1 should get the latest batch.
    max_pages_to_scrape = 1

    request_delay = 1

    scrape_dse_news_multiple_pages_corrected(
        base_url=news_url,
        session_cookie=session_id,
        max_pages=max_pages_to_scrape,
        delay=request_delay
    )

Step 1: Scraping all news data...
  Scraping page 1 (using empty POST data)...
    -> Identified Board Meeting for HWAWELLTEX under LR 16(1)
    -> Identified Board Meeting for RDFOOD under LR 16(1)
    -> Identified Board Meeting for ECABLES under LR 19(1)
    -> Identified Board Meeting for ARGONDENIM under LR 16(1)
    -> Identified Board Meeting for SAFKOSPINN under LR 16(1)
    -> Identified Board Meeting for ETL under LR 16(1)
    -> Identified Board Meeting for SONALIANSH under LR 19(1)
    -> Identified Board Meeting for COPPERTECH under LR 16(1)
    -> Identified Board Meeting for PREMIERCEM under LR 16(1)
    -> Identified Board Meeting for BSCPLC under LR 16(1)
    -> Identified Board Meeting for SONALIPAPR under LR 16(1)
    -> Identified Board Meeting for BPPL under LR 16(1)
    -> Identified Board Meeting for BDCOM under LR 16(1)
    -> Identified Board Meeting for BARKAPOWER under LR 16(1)
    -> Identified Board Meeting for SUMITPOWER under LR 16(1)
    -> Identified Bo