In [9]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import unicodedata

class WikipediaTableScraper:
    
    def __init__(self, url, table_index=0):
        self.url = url
        self.table_index = table_index
        self.soup = None
        self.table = None
        self.df = None

    def fetch_html(self):
        response = requests.get(self.url)
        if response.status_code != 200:
            raise Exception("Failed to fetch page")
        self.soup = BeautifulSoup(response.text, 'html.parser')

    def extract_table(self):
        tables = self.soup.find_all("table", {"class": ["wikitable", "sortable", "plainrowheaders", "collapsible"]})
        if len(tables) <= self.table_index:
            raise Exception("Table index out of range")
        self.table = tables[self.table_index]

    def clean_header(self, header_row):
        headers = []
        for th in header_row.find_all('th'):
            # Clean unwanted tags inside header
            for tag in ['br', 'a', 'sup']:
                if th.find(tag):
                    th.find(tag).extract()
            header = ' '.join(th.stripped_strings)
            headers.append(header)
        return headers

    def parse_row(self, row):
        cells = row.find_all(['th', 'td'])
        values = []
        for cell in cells:
            # Normalize unicode and remove leading/trailing spaces
            text = unicodedata.normalize("NFKD", cell.get_text(strip=True))
            values.append(text)
        return values

    def scrape(self):
        self.fetch_html()
        self.extract_table()

        header_row = self.table.find('tr')
        headers = self.clean_header(header_row)

        rows_data = []
        for row in self.table.find_all('tr')[1:]:
            parsed_row = self.parse_row(row)
            if parsed_row:  # ignore empty rows
                rows_data.append(parsed_row)

        self.df = pd.DataFrame(rows_data, columns=headers)
        return self.df

# ================== Example Usage =======================

url = "https://en.wikipedia.org/w/index.php?title=List_of_Falcon_9_and_Falcon_Heavy_launches&oldid=1027686922"
scraper = WikipediaTableScraper(url, table_index=2)
spacex_df = scraper.scrape()

print(spacex_df.head())
#this is the end version of wikipedia..any wikipedia page data i can get here

                                          Flight No.  \
0                                                 14   
1  Following second-stage separation, SpaceX atte...   
2                                                 15   
3  First launch under USAF'sOSP3 launch contract....   
4                                                 16   

            Date and time ( )                            Launch site  \
0   10 January 2015,09:47[67]  F9 v1.1B1012[8]  Cape Canaveral,LC-40   
1                        None             None                  None   
2  11 February 2015,23:03[72]  F9 v1.1B1013[8]  Cape Canaveral,LC-40   
3                        None             None                  None   
4  2 March 2015,03:50[21][76]  F9 v1.1B1014[8]  Cape Canaveral,LC-40   

                         Payload             Payload mass  \
0  SpaceX CRS-5[68](Dragon C107)  2,395 kg (5,280 lb)[69]   
1                           None                     None   
2                 DSCOVR[68][73]        570 kg 