In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Step 1: Get the page content
url = "https://web.archive.org/web/20230908091635/https://en.wikipedia.org/wiki/List_of_largest_banks"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Step 2: Locate the target table
table = soup.find("table", class_="wikitable")

# Step 3: Extract rows
data = []
for row in table.find_all("tr")[1:]:  # skip the header
    cols = row.find_all("td")
    if len(cols) == 3:
        rank = cols[0].text.strip()
        bank_name = cols[1].text.strip()
        market_cap = cols[2].text.strip()
        data.append([rank, bank_name, market_cap])
        save_logs("Data has been extracted")

# Step 4: Convert to DataFrame
df = pd.DataFrame(data, columns=["Rank", "Bank Name", "Market Cap (US$ Billion)"])

# Step 5: Display or save
print(df)

# Optional: Save to Excel or CSV
# df.to_csv("top_banks.csv", index=False)
# df.to_excel("top_banks.xlsx", index=False)


  Rank                                Bank Name Market Cap (US$ Billion)
0    1                           JPMorgan Chase                   432.92
1    2                          Bank of America                   231.52
2    3  Industrial and Commercial Bank of China                   194.56
3    4               Agricultural Bank of China                   160.68
4    5                                HDFC Bank                   157.91
5    6                              Wells Fargo                   155.87
6    7                        HSBC Holdings PLC                   148.90
7    8                           Morgan Stanley                   140.83
8    9                  China Construction Bank                   139.82
9   10                            Bank of China                   136.81


Transform

In [5]:
exchange=pd.read_excel("Exhange.xlsx")

In [6]:
exchange

Unnamed: 0,Currency,Rate
0,Pkr,283.0
1,Eur,0.87
2,Inr,86.0
3,Gbp,0.74


In [9]:
df['Market Cap (US$ Billion)'] = pd.to_numeric(df['Market Cap (US$ Billion)'], errors='coerce')

for _, row in exchange.iterrows():
    currency = row['Currency']
    rate = row['Rate']
    df[currency] = df['Market Cap (US$ Billion)'] * rate
    save_logs("Data has been Transformed")
    


# Step 4: Display result
print(df)

# Optional: Export to Excel
df.to_excel("converted_bank_market_caps.xlsx", index=False)

  Rank                                Bank Name  Market Cap (US$ Billion)  \
0    1                           JPMorgan Chase                    432.92   
1    2                          Bank of America                    231.52   
2    3  Industrial and Commercial Bank of China                    194.56   
3    4               Agricultural Bank of China                    160.68   
4    5                                HDFC Bank                    157.91   
5    6                              Wells Fargo                    155.87   
6    7                        HSBC Holdings PLC                    148.90   
7    8                           Morgan Stanley                    140.83   
8    9                  China Construction Bank                    139.82   
9   10                            Bank of China                    136.81   

         Pkr       Eur       Inr       Gbp  
0  122516.36  376.6404  37231.12  320.3608  
1   65520.16  201.4224  19910.72  171.3248  
2   55060.48  169

In [15]:
from datetime import datetime

def save_logs(message):
    with open(r'C:\Users\abx\logs\code_log.txt', 'a') as f:
        f.write(f'{datetime.now()} : {message}\n')


Load

In [None]:
df load():
    df.to_excel("converted_bank_market_caps.xlsx", index=False)

In [17]:
if __name__ =='__main__':
    url= "https://web.archive.org/web/20230908091635/https://en.wikipedia.org/wiki/List_of_largest_banks"
    


FULL AND COMPLETE APPLICATION

In [20]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

# Log helper
def save_logs(message):
    with open(r'C:\Users\abx\logs\code_log.txt', 'a') as f:
        f.write(f'{datetime.now()} : {message}\n')

# Step 1: Extract data from URL
def extract(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")
        table = soup.find("table", class_="wikitable")

        data = []
        for row in table.find_all("tr")[1:]:
            cols = row.find_all("td")
            if len(cols) == 3:
                rank = cols[0].text.strip()
                bank_name = cols[1].text.strip()
                market_cap = cols[2].text.strip().replace(",", "").replace("$", "")
                data.append([rank, bank_name, market_cap])
                save_logs(f"Extracted: {bank_name}")

        df = pd.DataFrame(data, columns=["Rank", "Bank Name", "Market Cap (US$ Billion)"])
        df['Market Cap (US$ Billion)'] = pd.to_numeric(df['Market Cap (US$ Billion)'], errors='coerce')
        return df

    except Exception as e:
        save_logs(f"Extraction failed: {e}")
        return pd.DataFrame()  # return empty if failed

# Step 2: Transform using exchange rates
def transform(df, exchange_file_path):
    try:
        exchange = pd.read_excel(exchange_file_path)
        for _, row in exchange.iterrows():
            currency = row['Currency']
            rate = row['Rate']
            df[currency] = df['Market Cap (US$ Billion)'] * rate
            save_logs(f"Transformed to: {currency}")
        return df
    except Exception as e:
        save_logs(f"Transformation failed: {e}")
        return df

# Step 3: Load/save to Excel
def load_data(df, output_file_path):
    try:
        df.to_excel(output_file_path, index=False)
        save_logs(f"File saved to: {output_file_path}")
    except Exception as e:
        save_logs(f"Saving failed: {e}")

# ✅ Main logic
if __name__ == '__main__':
    url = "https://web.archive.org/web/20230908091635/https://en.wikipedia.org/wiki/List_of_largest_banks"
    exchange_file_path = "Exchange.xlsx"
    output_file_path = "converted_bank_market_caps.xlsx"

    df = extract(url)
    df = transform(df, exchange_file_path)
    load_data(df, output_file_path)
    print(df)


  Rank                                Bank Name  Market Cap (US$ Billion)  \
0    1                           JPMorgan Chase                    432.92   
1    2                          Bank of America                    231.52   
2    3  Industrial and Commercial Bank of China                    194.56   
3    4               Agricultural Bank of China                    160.68   
4    5                                HDFC Bank                    157.91   
5    6                              Wells Fargo                    155.87   
6    7                        HSBC Holdings PLC                    148.90   
7    8                           Morgan Stanley                    140.83   
8    9                  China Construction Bank                    139.82   
9   10                            Bank of China                    136.81   

         Pkr       Eur       Inr       Gbp  
0  122516.36  376.6404  37231.12  320.3608  
1   65520.16  201.4224  19910.72  171.3248  
2   55060.48  169