In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import sqlite3
from datetime import datetime
import logging
from functools import wraps

In [64]:
url = 'https://web.archive.org/web/20230908091635 /https://en.wikipedia.org/wiki/List_of_largest_banks'

table_attribs_in = ['Name', 'MC_USD_Billion']
table_attribs_out = ['Name', 'MC_USD_Billion', 'MC_GBP_Billion', 'MC_EUR_Billion', 'MC_INR_Billion']

csv_file = 'Largest_banks_data.csv'
db_file = 'Banks.db'
log_file = 'code_log.txt'

table_name = 'Largest_banks'

connection = sqlite3.connect(db_file)
logging.basicConfig(filename=log_file, level=logging.INFO)


In [3]:
def log_progress(message):
    ''' This function logs the mentioned message 
    at a given stage of the code execution to a log file. 
    Function returns nothing'''
    
    timestamp_format = '%Y-%m-%d %H-%M-%S.%f'
    now = datetime.now() # get current timestamp 
    timestamp = now.strftime(timestamp_format) 

    log_text = f"{timestamp} - {message}\n"
    print(log_text)

    logging.info(log_text)

def log(func):
    """Decorator that logs the start and end of a function call,
    along with exception handling and re-raising.

    Args:
        func (callable): The function to be decorated.

    Returns:
        function: The decorated wrapper function.

    Raises:
        Exception: Any exception raised within the decorated function.
    """
    @wraps(func)
    def wrapper(*args, **kwargs):
        try:
            # Log start with message and function name
            log_progress(f"Calling {func.__name__} ...")
            result = func(*args, **kwargs)
            # Log end with message and function name
            log_progress(f"Finished {func.__name__}.")
            
            return result
        
        except Exception as e:
            logging.exception(f"Exception raised in {func.__name__}. exception: {str(e)}.")
            raise e

    return wrapper

In [30]:
def extract(url, table_attributes):

    """
    Extracts tabular information from a given URL under the heading 
    "By Market Capitalization" and saves it to a pandas DataFrame.

    Args:
        url (str): The URL of the webpage containing the table.
        table_attributes (list): A list of column names for the DataFrame.

    Returns:
        pandas.DataFrame: A DataFrame containing the extracted data.

    Raises:
        ValueError: If the table is not found.
    """

    # Get URL content
    response = requests.get(url)

    # Parse HTML content to Beautiful Soup object
    soup = BeautifulSoup(response.text, "html.parser")

    # Find the right table in the Soup object 
    tables = soup.find_all("table", class_="wikitable")

    # Choose the first table, called: "By market capitalization"
    table = tables[0]

    # Identify rows (containing many columns) in the table
    rows = table.findAll('tr')

    # Extract data from the table row by row
    # Initialize an empty list to store Wikitable data
    data = []
    for row in rows[1:]:

        # Create a list fo columns in each row
        columns = row.find_all('td')

        # Extract text from specific cell and strip
        bank_name = columns[1].text.strip()
        market_cap = float(columns[2].text.strip())

        # Append rows as lists to list
        data.append([bank_name, market_cap])

        # Create the dataframe from the appended data list
        df = pd.DataFrame(data, columns=table_attributes)

    return df

df = extract(url, table_attribs_in)
df

Unnamed: 0,Name,MC_USD_Billion
0,JPMorgan Chase,432.92
1,Bank of America,231.52
2,Industrial and Commercial Bank of China,194.56
3,Agricultural Bank of China,160.68
4,HDFC Bank,157.91
5,Wells Fargo,155.87
6,HSBC Holdings PLC,148.9
7,Morgan Stanley,140.83
8,China Construction Bank,139.82
9,Bank of China,136.81


In [41]:
def rate(currency):

    try:
        # Load the CSV data into a DataFrame
        df = pd.read_csv("exchange_rate.csv")

    except FileNotFoundError:
        print("Error: exchange_rate.csv file not found.")
        return None

    try:
        # Extract exchange rates into variables
        eur_rate =df[df['Currency'] == currency]['Rate'].values[0]
        gbp_rate = df[df['Currency'] == currency]['Rate'].values[0]
        inr_rate = df[df['Currency'] == currency]['Rate'].values[0]

        if currency == 'EUR':
            return eur_rate
        elif currency == 'GBP':
            return gbp_rate
        elif currency == 'INR':
            return inr_rate
        else:
            print('\nWrong currency specified.\n')

    except Exception as e:
        print(f"Error: Invalid data provided: \n{e}")
        return None
    
rate('EUR')

0.93

In [51]:
# Write a function to transform the data frame by adding columns for Market Capitalization 
# in GBP, EUR, and INR, rounded to 2 decimal places, based on the exchange rate information shared as a CSV file.

def transform(df):

    df['MC_GBP_Billion'] = round(df['MC_USD_Billion'] * rate('GBP'), 2)
    df['MC_EUR_Billion'] = round(df['MC_USD_Billion'] * rate('EUR'), 2)
    df['MC_INR_Billion'] = round(df['MC_USD_Billion'] * rate('INR'), 2)

    return df

df = transform(df)
df

Unnamed: 0,Name,MC_USD_Billion,MC_GBP_Billion,MC_EUR_Billion,MC_INR_Billion
0,JPMorgan Chase,432.92,346.34,402.62,35910.71
1,Bank of America,231.52,185.22,215.31,19204.58
2,Industrial and Commercial Bank of China,194.56,155.65,180.94,16138.75
3,Agricultural Bank of China,160.68,128.54,149.43,13328.41
4,HDFC Bank,157.91,126.33,146.86,13098.63
5,Wells Fargo,155.87,124.7,144.96,12929.42
6,HSBC Holdings PLC,148.9,119.12,138.48,12351.26
7,Morgan Stanley,140.83,112.66,130.97,11681.85
8,China Construction Bank,139.82,111.86,130.03,11598.07
9,Bank of China,136.81,109.45,127.23,11348.39


In [63]:
# Function to load the transformed data frame to an output CSV file.

def load_to_csv(df, csv_path):
    """
    Saves a pandas DataFrame to a CSV file at the specified path.

    Args:
        df (pandas.DataFrame): The DataFrame to save as a CSV file.
        csv_path (str): The path to the output CSV file.

    Raises:
        ValueError: If `csv_path` is not a valid string representing a file path.
        IOError: If there are errors opening or writing to the CSV file.
        TypeError: If `df` is not a pandas DataFrame.
        IndexError: If the DataFrame has a multi-index and `index=True` is not specified in `to_csv`.
        UnicodeEncodeError: If there are issues encoding strings in the DataFrame.

    Returns:
        None
    """

    with open(csv_path, 'w') as file:
        try:
            df.to_csv(file, index=False)
            print(f"Saved dataframe to the CSV file: {csv_path}.")

        except (IOError, ValueError, IndexError, UnicodeEncodeError) as e:
            # Handle specific exceptions with tailored messages
            print(f"Specific error encountered: \n{e}")
        
        except Exception as e:
            # Handle any other unexpected errors
            logging.error("General error writing to CSV:", exc_info=True)


load_to_csv(df, csv_file)

In [65]:
# Write a function to load the transformed data frame to an SQL database server as a table.

def load_to_db(df, sql_connection, table_name):
    """
    Saves a DataFrame to a SQL database as a table.

    Args:
        df (pd.DataFrame): The DataFrame to save.
        sql_connection: An open SQL connection object.
        table_name (str): The name of the table to create or replace.

    Raises:
        ValueError: If the DataFrame is empty.
        Exception: For any other errors encountered during saving.
    """

    if df.empty:
        raise ValueError("DataFrame is empty. Cannot load to database.")

    try:
        with sql_connection:
            df.to_sql(table_name, sql_connection, if_exists='replace', index=False)
            print(f"Saved dataframe to the database: {db_file}.")

    except Exception as e:
        raise Exception(f"Error saving DataFrame to database: {e}")
    
load_to_db(df, connection, table_name)