In [32]:
import asyncio
import pandas as pd
from playwright.async_api import async_playwright

async def scrape_data(state_name: str, district_name: str):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        
        async def close_popup():
            try:
                # Wait for popup to appear (multiple possible selectors)
                popup_selectors = [
                    "#onloadModal",
                    ".modal",
                    "[role='dialog']",
                    ".popup",
                    ".modal-dialog"
                ]
                
                popup_found = False
                for selector in popup_selectors:
                    try:
                        await page.wait_for_selector(selector, timeout=2000)
                        popup_found = True
                        print(f"Found popup with selector: {selector}")
                        break
                    except:
                        continue
                
                if popup_found:
                    # Try multiple close button selectors
                    close_selectors = [
                        "#onloadModal button.close",
                        "#onloadModal .btn-close",
                        "#onloadModal .close",
                        ".modal button.close",
                        ".modal .btn-close",
                        ".modal .close",
                        "[aria-label='Close']",
                        "button[data-dismiss='modal']",
                        ".modal-header .close",
                        "button:has-text('×')",
                        "button:has-text('Close')"
                    ]
                    
                    for close_selector in close_selectors:
                        try:
                            close_btn = await page.query_selector(close_selector)
                            if close_btn:
                                await close_btn.click()
                                print(f"Clicked close button: {close_selector}")
                                # Wait a bit for the popup to close
                                await page.wait_for_timeout(1000)
                                return True
                        except Exception as e:
                            continue
                    
                    # If no close button found, try pressing Escape
                    try:
                        await page.keyboard.press("Escape")
                        print("Pressed Escape to close popup")
                        await page.wait_for_timeout(1000)
                        return True
                    except:
                        pass
                        
                    # If still can't close, try clicking outside the modal
                    try:
                        await page.click("body", position={"x": 10, "y": 10})
                        print("Clicked outside modal to close")
                        await page.wait_for_timeout(1000)
                        return True
                    except:
                        pass
                
            except Exception as e:
                print(f"Error in close_popup: {e}")
                pass
            
            return False
        
        async def extract_table_data():
            """Extract data from the current table"""
            try:
                # Wait for results table
                await page.wait_for_selector("table", timeout=15000)
                
                # Extract table headers first
                headers = []
                try:
                    header_cells = await page.query_selector_all("table thead tr th")
                    for header in header_cells:
                        header_text = await header.inner_text()
                        headers.append(header_text.strip())
                except:
                    # Default headers based on the structure
                    headers = [
                        "Serial No", 
                        "Type", 
                        "District", 
                        "Area", 
                        "Dealer name", 
                        "Mobile No", 
                        "Address", 
                        "NaPanta Mobile App"
                    ]
                
                # Extract table rows
                rows = await page.query_selector_all("table tbody tr")
                data = []
                
                for row in rows:
                    cols = await row.query_selector_all("td")
                    row_data = []
                    for col in cols:
                        text = await col.inner_text()
                        row_data.append(text.strip())
                    if row_data:  # Only add non-empty rows
                        data.append(row_data)
                
                return data, headers
            except Exception as e:
                print(f"Error extracting table data: {e}")
                return [], []
        
        all_data = []
        all_headers = []
        
        try:
            # --- Open website ---
            print("Opening website...")
            await page.goto("https://www.napanta.com/seed-dealer", wait_until="domcontentloaded")
            
            # Wait a bit for any popups to load
            await page.wait_for_timeout(2000)
            
            # --- Close popup if appears ---
            print("Checking for initial popup...")
            await close_popup()
            
            # --- Select state ---
            print(f"Selecting state: {state_name}")
            await page.wait_for_selector("#ddlState", timeout=10000)
            await page.select_option("#ddlState", label=state_name)
            
            # Wait for district dropdown to populate
            await page.wait_for_timeout(2000)
            
            # --- Select district ---
            print(f"Selecting district: {district_name}")
            await page.wait_for_selector("#ddlDistrict", timeout=10000)
            await page.select_option("#ddlDistrict", label=district_name)
            
            # Wait for area dropdown to populate
            await page.wait_for_timeout(2000)
            
            # --- Get all area options ---
            print("Getting all available areas...")
            area_dropdown = await page.wait_for_selector("#ddlMarket", timeout=10000)
            area_options = await page.query_selector_all("#ddlMarket option")
            
            # Extract area names (skip the first option which is usually "Select Area" or similar)
            areas = []
            for option in area_options[1:]:  # Skip first option
                area_text = await option.inner_text()
                area_value = await option.get_attribute("value")
                if area_text and area_value and area_text.strip() != "":
                    areas.append(area_text.strip())
            
            print(f"Found {len(areas)} areas: {areas}")
            
            # --- Iterate through each area ---
            for i, area_name in enumerate(areas):
                print(f"\n--- Processing area {i+1}/{len(areas)}: {area_name} ---")
                
                try:
                    # Select the current area
                    await page.select_option("#ddlMarket", label=area_name)
                    await page.wait_for_timeout(1000)
                    
                    # --- Make sure popup is gone before clicking GO ---
                    print("Checking for popup before clicking GO...")
                    await close_popup()
                    
                    # --- Click GO button ---
                    print("Clicking GO button...")
                    
                    # Wait for GO button and ensure it's clickable
                    go_button = await page.wait_for_selector("button.go-btn", timeout=10000)
                    
                    # Scroll to button if needed
                    await go_button.scroll_into_view_if_needed()
                    
                    # Make sure button is enabled and visible
                    is_enabled = await go_button.is_enabled()
                    is_visible = await go_button.is_visible()
                    
                    print(f"GO button - Enabled: {is_enabled}, Visible: {is_visible}")
                    
                    if not is_enabled or not is_visible:
                        print("GO button not ready, waiting...")
                        await page.wait_for_timeout(2000)
                    
                    await go_button.click()
                    print("GO button clicked successfully")
                    
                    # --- Close popup again if it reappears after reload ---
                    await page.wait_for_timeout(2000)
                    print("Checking for popup after GO click...")
                    await close_popup()
                    
                    # --- Extract table data for current area ---
                    print(f"Extracting table data for area: {area_name}")
                    area_data, headers = await extract_table_data()
                    
                    if area_data:
                        all_data.extend(area_data)
                        if not all_headers and headers:
                            all_headers = headers
                        print(f"Extracted {len(area_data)} rows for {area_name}")
                    else:
                        print(f"No data found for area: {area_name}")
                    
                    # Wait a bit before processing next area
                    await page.wait_for_timeout(1000)
                    
                except Exception as e:
                    print(f"Error processing area '{area_name}': {e}")
                    continue
            
            print(f"\n--- Completed processing all areas ---")
            print(f"Total rows extracted: {len(all_data)}")
            
        except Exception as e:
            print(f"Error during scraping: {e}")
            # Take a screenshot for debugging
            await page.screenshot(path="error_screenshot.png")
            raise e
        finally:
            await browser.close()
        
        # Create consolidated DataFrame
        if all_data:
            # Use extracted headers if available, otherwise use default column names
            if not all_headers:
                all_headers = [
                    "Serial No", 
                    "Type", 
                    "District", 
                    "Area", 
                    "Dealer name", 
                    "Mobile No", 
                    "Address", 
                    "NaPanta Mobile App"
                ]
            
            # Ensure we have the right number of columns
            if all_data and len(all_data[0]) != len(all_headers):
                print(f"Warning: Data has {len(all_data[0])} columns but {len(all_headers)} headers")
                # Adjust headers to match data length
                if len(all_data[0]) > len(all_headers):
                    for i in range(len(all_headers), len(all_data[0])):
                        all_headers.append(f"Column_{i+1}")
                else:
                    all_headers = all_headers[:len(all_data[0])]
            
            df = pd.DataFrame(all_data, columns=all_headers)
            
            # Clean up the data
            for col in df.columns:
                df[col] = df[col].astype(str).str.strip()
                # Replace empty strings and dashes with None
                df[col] = df[col].replace(['', '-', 'nan'], None)
            
            # Reset index to have sequential numbering
            df.reset_index(drop=True, inplace=True)
            
            print(f"DataFrame created with columns: {list(df.columns)}")
            return df
        else:
            print("No data extracted from any area")
            return pd.DataFrame()

# -----------------------------
# Run the scraper
# -----------------------------
if __name__ == "__main__":
    state_name = "Uttar Pradesh"
    district_name = "Lucknow"
    
    try:
        df = asyncio.run(scrape_data(state_name, district_name))
        print("\nConsolidated data from all areas:")
        print(df.head(10))  # Show first 10 rows
        print(f"\nTotal rows: {len(df)}")
        print(f"Columns: {list(df.columns)}")
        
        # Show area breakdown
        if not df.empty and 'Area' in df.columns:
            print(f"\nArea breakdown:")
            area_counts = df['Area'].value_counts()
            for area, count in area_counts.items():
                print(f"  {area}: {count} dealers")
                
        # Save to CSV for easy access
        if not df.empty:
            filename = f"seed_dealers_{state_name}_{district_name}.csv"
            df.to_csv(filename, index=False)
            print(f"\nData saved to: {filename}")
            
    except Exception as e:
        print(f"Scraping failed: {e}")

Opening website...
Checking for initial popup...
Found popup with selector: #onloadModal
Clicked close button: #onloadModal .btn-close
Selecting state: Uttar Pradesh
Selecting district: Lucknow
Getting all available areas...
Found 8 areas: ['Bakshi-Ka-Talab', 'Chinhat', 'Gosaiganj', 'Kakori', 'Mal', 'Malihabad', 'Mohanlalganj', 'Sarojaninagar']

--- Processing area 1/8: Bakshi-Ka-Talab ---
Checking for popup before clicking GO...
Clicking GO button...
GO button - Enabled: True, Visible: True
GO button clicked successfully
Checking for popup after GO click...
Found popup with selector: #onloadModal
Clicked close button: #onloadModal .btn-close
Extracting table data for area: Bakshi-Ka-Talab
Extracted 29 rows for Bakshi-Ka-Talab

--- Processing area 2/8: Chinhat ---
Checking for popup before clicking GO...
Clicking GO button...
GO button - Enabled: True, Visible: True
GO button clicked successfully
Checking for popup after GO click...
Found popup with selector: #onloadModal
Clicked close

In [33]:
df

Unnamed: 0,Serial No,Type,District,Area,Dealer name,Mobile No,Address,NaPanta Mobile App
0,1,Seed Dealers,Lucknow,Bakshi-Ka-Talab,Ms Ridhi Agro India,,Plot No 15 Jankivihar Colony Jankipuram,For Free Alerts
1,2,Seed Dealers,Lucknow,Bakshi-Ka-Talab,Ms Utkarsh Seeds,,660 14 A Foolbagh Colony Kursi Road,For Free Alerts
2,3,Seed Dealers,Lucknow,Bakshi-Ka-Talab,Ms Walnet Seeds Company,,Dr Rajendranagar Godam Shivbalak Maurya Ka God...,For Free Alerts
3,4,Seed Dealers,Lucknow,Bakshi-Ka-Talab,Ms. AAZAD KRISHI KENDRA,,MADHAVPUR,For Free Alerts
4,5,Seed Dealers,Lucknow,Bakshi-Ka-Talab,Ms. CHAUHAN BEEJ BHANDAR,,MAMPUR VANA,For Free Alerts
...,...,...,...,...,...,...,...,...
170,7,Seed Dealers,Lucknow,Sarojaninagar,Ms. MAA VAISHNO BEEJ BHANDAR,,HARONI,For Free Alerts
171,8,Seed Dealers,Lucknow,Sarojaninagar,Ms. P.C.F. KENDRA,,JAITI MORE,For Free Alerts
172,9,Seed Dealers,Lucknow,Sarojaninagar,Ms. P.C.F. KENDRA,,LONHA,For Free Alerts
173,10,Seed Dealers,Lucknow,Sarojaninagar,Ms. TIWARI KHAD BHANDAR,,BANTHRA BAZAR,For Free Alerts
