## Importing Modules

In [13]:
import pandas as pd
import numpy as np
import os

# !! Important !! Change The Country Name Accordingly

In [14]:
country = "USA"

In [15]:
csv_folder = "CSV"  # Define the folder name
csv_files = [f for f in os.listdir(csv_folder) if f.endswith('.csv')]
csv_files

FileNotFoundError: [Errno 2] No such file or directory: 'CSV'

In [16]:
for csv in csv_files:
    df = pd.read_csv(csv)

    # Ensure the 'Date' column is in datetime format
    df['Date'] = pd.to_datetime(df['Date'])
    df['Year'] = df['Date'].dt.year
    
    # Removing , from Prices
    try:
        df['Price'] = df['Price'].str.replace(',', '', regex=True).astype(float)
    except AttributeError:
        pass

    # Removing % from Change %
    try:
        df['Change %'] = df['Change %'].str.rstrip('%').astype(float)
    except AttributeError:
        pass

    # Filter for years 2014-2023
    df = df[(df['Year'] >= 2014) & (df['Year'] <= 2023)]
    df = df[(df['Year'] >= 2014) & (df['Year'] <= 2023)]
    df.to_csv(csv, index=False)

# For Standard Deviation Data

In [17]:
def calculate_yearly_std(df):
    """
    Calculates the yearly standard deviation of stock prices and adds a new column.
    :param df: DataFrame containing 'Date' and 'Close' price columns.
    :return: DataFrame with an additional 'Std Dev of Stock Prices' column.
    """
    # Read the CSV

    # Group by year and calculate std dev
    df_std = df.groupby(by='Year')['Change %'].std()
    df_std = np.array(df_std)
    df_std = pd.DataFrame(df_std)
    df_std['Year'] = df['Year'].unique()[::-1]
    df_std.loc[:,0] = df_std.loc[:,0].round(3)
    df_std['Std Dev'] = df_std.iloc[:,0]
    df_std.drop(df_std.columns[0], axis=1, inplace=True)
    
    return df_std

# For Yearly Covariance

In [18]:
def calculate_yearly_covariance(stock_df, index_df):
    """
    Calculates the yearly covariance between stock prices and index level.
    
    :param stock_df: DataFrame containing 'Date' and 'Close' price columns for the stock.
    :param index_df: DataFrame containing 'Date' and 'Close' price columns for the index.
    :return: DataFrame with yearly covariance values.
    """

    # Merge stock and index data on Common 'Date' and 'Year'

    merged_df = pd.merge(stock_df, index_df, on=['Date','Year'], how='inner', suffixes=('_Stock', '_Index'))

    # Calculate yearly covariance
    yearly_cov =  merged_df.groupby('Year')[['Change %_Stock', 'Change %_Index']].apply(lambda x: x.cov().iloc[0, 1])
    yearly_cov = np.array(yearly_cov)
    yearly_cov = pd.DataFrame(yearly_cov)
    yearly_cov['Year'] = stock_df['Year'].unique()[::-1]

    if stock_df.equals(index_df):
        # Renaming the column to Variance
        yearly_cov.rename(columns={yearly_cov.columns[0]: 'Variance'}, inplace=True)

        # Reordering the columns
        yearly_cov = yearly_cov.loc[:,['Year', 'Variance']]

    else: 
        # Renaming the column to Covariance
        yearly_cov.rename(columns={yearly_cov.columns[0]: 'Covariance'}, inplace=True)
        
        # Reordering the columns
        yearly_cov = yearly_cov.loc[:,['Year', 'Covariance']]
        
    return yearly_cov

# Finding Beta & Risks

In [19]:
def calculate_beta_risks(stock, index):
    """
    Calculates the stock market beta from a DataFrame with two columns: 
    'Stock' and 'Index'. Returns a new DataFrame with an additional column 'Beta'.
    
    Parameters:
        df (pd.DataFrame): A DataFrame with 'Stock' and 'Index' price columns.
    
    Returns:
        pd.DataFrame: Original DataFrame with an added 'Beta' column.
    """

    yearly_cov = (
        calculate_yearly_covariance(stock, index)
        .set_index('Year')['Covariance']  # Selecting only 'Covariance'
        .div(
            calculate_yearly_covariance(index, index)
            .set_index('Year')['Variance']  # Selecting only 'Variance'
        )
    )

    yearly_cov = np.array(yearly_cov)
    yearly_cov = pd.DataFrame(yearly_cov)
    yearly_cov['Year'] = index['Year'].unique()[::-1]
    yearly_cov.rename(columns={yearly_cov.columns[0]: 'Beta'}, inplace=True)
    yearly_cov = yearly_cov.loc[:,['Year', 'Beta']]
    yearly_cov = yearly_cov.dropna(subset=['Beta'])
    yearly_cov = pd.merge(yearly_cov, calculate_yearly_covariance(stock, stock), on=['Year'], how='inner')
    yearly_cov = yearly_cov.rename(columns={'Variance':'Total Risk'})

    # Calculate systematic risk (Beta squared * Index Variance)
    yearly_cov['Systematic Risk'] = yearly_cov['Total Risk'] * (yearly_cov['Beta'] ** 2)

    # Calculate idiosyncratic risk (Total Risk - Systematic Risk)
    yearly_cov['Idiosyncratic Risk'] = yearly_cov.apply(
        lambda row: -((abs(row['Total Risk'] - row['Systematic Risk']))**0.5) 
        if (row['Total Risk'] - row['Systematic Risk']) < 0 
        else (abs(row['Total Risk'] - row['Systematic Risk']))**0.5, 
        axis=1
    )


    return yearly_cov

# Existing Rows

In [20]:
def ensure_minimum_rows(df):
    """
    Ensures that a DataFrame has at least 10 rows. If it has fewer, it adds NaN rows 
    except for the 'Year' column, which is filled with missing years between 2014 and 2023.
    
    Parameters:
        df (pd.DataFrame): Must contain a 'Year' column.
    
    Returns:
        pd.DataFrame: DataFrame with at least 10 rows.
    """
    
    if 'Year' not in df.columns:
        raise ValueError("The DataFrame must contain a 'Year' column.")
    
    # Get existing years and count current rows
    existing_years = set(df['Year'])
    num_rows = len(df)
    
    if num_rows >= 10:
        return df  # No need to add rows
    
    # Find missing years within the range 2014-2023
    possible_years = set(range(2014, 2024))  # 2024 is exclusive
    missing_years = sorted(possible_years - existing_years)
    
    # Determine how many extra rows are needed
    rows_needed = 10 - num_rows
    extra_years = missing_years[:rows_needed]  # Take only required years
    
    # Create a DataFrame with NaN values in all other columns
    nan_rows = pd.DataFrame({col: np.nan for col in df.columns}, index=range(rows_needed))
    nan_rows['Year'] = extra_years  # Fill 'Year' column with missing years
    
    # Concatenate original and new rows
    df = pd.concat([df, nan_rows], ignore_index=True)
    df = df.sort_values(by='Year', ascending=True)
    return df


# Index Calculations

In [21]:
index = pd.read_csv('Index.csv')
index_price = index.loc[:,['Date', 'Price', 'Change %', 'Year']][::-1]
index_std = calculate_yearly_std(index)

# Generating The Excel Sheets

In [22]:
# Create a new Excel writer object
with pd.ExcelWriter(f"{country}_beta_risks.xlsx", engine="xlsxwriter") as writer:

    for csv_file in csv_files:
        if csv_file != 'Index.csv':
            # Reading in from CSV File
            temp_df = pd.read_csv(csv_file)

            # Merging with Index
            std_dev_df = pd.merge(calculate_yearly_std(temp_df), index_std, on='Year', how='inner', suffixes=(' of Stock', ' of Index'))

            # Calulating Beta & 3 Risks
            beta_risks_df = calculate_beta_risks(temp_df, index)

            # Merge the Stddev and Beta Dataframes
            merged_df = pd.merge(std_dev_df, beta_risks_df, on='Year', how='inner')

            # Ensuring 10 rows are there (2014 to 2023)
            merged_df = ensure_minimum_rows(merged_df)

            # Using first 9 letters to name the Excel Sheet
            sheet_name = csv_file[:9] 
            
            # Writing to a separate sheet
            merged_df.to_excel(writer, sheet_name=sheet_name, index=False)  

print(f"Excel file '{country}_beta_risks.xlsx' created successfully!")

Excel file 'USA_beta_risks.xlsx' created successfully!


In [None]:
with pd.ExcelWriter(f"{country}_pct_change.xlsx", engine="xlsxwriter") as writer:
    for csv_file in csv_files:
        # Read CSV and process stock data
        df = pd.read_csv(csv_file)
        
        # Convert Date columns to datetime (without converting to strings)
        df['Date'] = pd.to_datetime(df['Date'])
        index_copy = index.copy()
        index_copy['Date'] = pd.to_datetime(index_copy['Date'])

        # Select relevant columns
        df = df[['Date', 'Price', 'Change %']]
        index_copy = index_copy[['Date', 'Price', 'Change %']]
        
        # Merge using datetime columns
        df = pd.merge(df, index_copy, on='Date', suffixes=(' in Stock Price', ' in Index Level'))
        
        # Rename columns
        df = df.rename(columns={'Price in Stock Price': 'Stock Price', 'Price in Index Level': 'Index Level'})

        # Write to Excel
        sheet_name = csv_file[:9]  # Cleaner sheet name
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Excel file '{country}_pct_change.xlsx' created successfully!")

Excel file 'USA_pct_change.xlsx' created successfully!
