## COMPUTE DAILY RETURNS

In [None]:
import os
import pandas as pd

# Define the folder containing the CSV files
folder = 'Data/etfs'

#Loop through all files in the folder
for file in os.listdir(folder):
    # Check if the file has a '.csv' extension
    if file.endswith(".csv"):
        # Construct the full file path
        file_path = os.path.join(folder, file)
        # Read the ETF data from the CSV file into a Pandas DataFrame
        df = pd.read_csv(file_path)
        # Calculate the 'daily_return' column and multiply by 100
        df['daily_return'] = (df['Adj Close'].shift(0) / df['Adj Close'].shift(1) - 1) * 100
        # Save the updated DataFrame back to the CSV file without including the index
        df.to_csv(file_path, index=False)

## COMPUTE MONTHLY MEAN FOR DAILY RETURNS

In [None]:
# Import necessary libraries
import os
import pandas as pd

# Define the folder containing the CSV files
folder = 'Data/etfs'

# Loop through all files in the folder
for file in os.listdir(folder):
    # Check if the current file has the ".csv" extension
    if file.endswith(".csv"):
        # Create the full file path
        file_path = os.path.join(folder, file)
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        # Convert the 'Date' column to datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        # Calculate the monthly average of daily returns for each month
        monthly_mean = df.groupby(df['Date'].dt.to_period('M'))['daily_return'].mean()
        # Create a new 'monthly_mean' column in the original DataFrame and assign the monthly average values
        df['monthly_mean'] = df['Date'].dt.to_period('M').map(monthly_mean)
        # Save the modified DataFrame back to the original CSV file without including the index
        df.to_csv(file_path, index=False)


## COMPUTE MONTHLY RMSE

In [None]:
# Import necessary libraries
import os
import pandas as pd
import numpy as np

# Folder containing the CSV files
folder = 'Data/etfs'

# Loop through all CSV files in the folder
for csv_file in os.listdir(folder):
    if csv_file.endswith('.csv'):
        # Create the full file path
        file_path = os.path.join(folder, csv_file)

        # Load the DataFrame from each CSV file
        df = pd.read_csv(file_path)

        # Convert the 'Date' column to datetime format
        df['Date'] = pd.to_datetime(df['Date'])

        # Group the data by month
        monthly_group = df.groupby(df['Date'].dt.to_period("M"))

        # Calculate the Root Mean Squared Error (RMSE) for each month and add the 'RMSE' column
        for month_name, month_data in monthly_group:
            rmse_month = np.sqrt(np.mean((month_data['daily_return'] - month_data['monthly_mean'])**2))
            df.loc[df['Date'].dt.to_period("M") == month_name, 'RMSE'] = rmse_month

        # Save the updated DataFrame back to the CSV file
        df.to_csv(file_path, index=False)

## COMPUTE MONTHLY MEAN OF VOLUMES 

In [None]:
# Import necessary libraries
import os
import pandas as pd

# Define the folder containing the CSV files
folder = 'Data/etfs'

# Loop through all files in the folder
for file in os.listdir(folder):
    # Check if the current file has the ".csv" extension
    if file.endswith(".csv"):
        # Create the full file path
        file_path = os.path.join(folder, file)
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        # Convert the 'Date' column to datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        # Calculate the monthly average of daily volume for each month
        monthly_volume_mean = df.groupby(df['Date'].dt.to_period('M'))['Volume'].mean()
        # Create a new 'Vol_month_mean' column in the original DataFrame and assign the monthly average values
        df['Vol_month_mean'] = df['Date'].dt.to_period('M').map(monthly_volume_mean)
        # Save the modified DataFrame back to the original CSV file without including the index
        df.to_csv(file_path, index=False)
