In [5]:
import os
import pandas as pd

folder_path = 'Data'

# Loop attraverso tutti i file nella cartella
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)

        # Leggi il file CSV
        df = pd.read_csv(file_path)

        # Rimuovi la colonna 'monthly_mean' se presente
        if 'Vol_month_mean' in df.columns:
            df = df.drop('Vol_month_mean', axis=1)

            # Salva il DataFrame modificato nel file
            df.to_csv(file_path, index=False)
            print(f"Colonna 'monthly_mean' rimossa da {filename}")
        else:
            print(f"Colonna 'monthly_mean' non presente in {filename}")


Colonna 'monthly_mean' rimossa da AAA.csv
Colonna 'monthly_mean' rimossa da AAAU.csv
Colonna 'monthly_mean' rimossa da AADR.csv
Colonna 'monthly_mean' rimossa da AAPB.csv
Colonna 'monthly_mean' rimossa da AAPD.csv
Colonna 'monthly_mean' rimossa da AAPU.csv
Colonna 'monthly_mean' rimossa da AAPY.csv
Colonna 'monthly_mean' rimossa da AAXJ.csv
Colonna 'monthly_mean' rimossa da ABEQ.csv
Colonna 'monthly_mean' rimossa da ACES.csv
Colonna 'monthly_mean' rimossa da ACIO.csv
Colonna 'monthly_mean' rimossa da ACSI.csv
Colonna 'monthly_mean' rimossa da ACTV.csv
Colonna 'monthly_mean' rimossa da ACVF.csv
Colonna 'monthly_mean' rimossa da ACWI.csv
Colonna 'monthly_mean' rimossa da ACWV.csv
Colonna 'monthly_mean' rimossa da ACWX.csv
Colonna 'monthly_mean' rimossa da ADFI.csv
Colonna 'monthly_mean' rimossa da ADIV.csv
Colonna 'monthly_mean' rimossa da ADME.csv
Colonna 'monthly_mean' rimossa da ADPV.csv
Colonna 'monthly_mean' rimossa da ADVE.csv
Colonna 'monthly_mean' rimossa da AEMB.csv
Colonna 'mon

In [13]:
import os
import pandas as pd
import numpy as np

class ETFProcessor:
    def __init__(self, folder_path):
        self.folder_path = folder_path

    def process_file(self, file_path):
        # Read the ETF data from the CSV file into a Pandas DataFrame
        df = pd.read_csv(file_path)
        # Convert the 'Date' column to datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        return df

    def save_to_csv(self, df, file_path):
        # Save the DataFrame back to the CSV file without including the index
        df.to_csv(file_path, index=False)

    def daily_return_calculation(self):
        for filename in os.listdir(self.folder_path):
            if filename.endswith(".csv"):
                file_path = os.path.join(self.folder_path, filename)
                df = self.process_file(file_path)
                # Calculate the 'daily_return' column and multiply by 100
                df['daily_return'] = (df['Adj Close'].shift(0) / df['Adj Close'].shift(1) - 1) * 100
                self.save_to_csv(df, file_path)

    def calculate_monthly_mean(self):
        for filename in os.listdir(self.folder_path):
            if filename.endswith(".csv"):
                file_path = os.path.join(self.folder_path, filename)
                df = self.process_file(file_path)
                monthly_mean = df.groupby(df['Date'].dt.to_period('M'))['daily_return'].mean()
                df['monthly_mean'] = df['Date'].dt.to_period('M').map(monthly_mean)
                self.save_to_csv(df, file_path)

    def calculate_volatility(self):
        for filename in os.listdir(self.folder_path):
            if filename.endswith(".csv"):
                file_path = os.path.join(self.folder_path, filename)
                df = self.process_file(file_path)
                monthly_group = df.groupby(df['Date'].dt.to_period("M"))
                df['volatility'] = np.nan  # Initialize the volatility column
                for month_name, month_data in monthly_group:
                    volatility_month = np.sqrt(np.mean((month_data['daily_return'] - month_data['monthly_mean'])**2))
                    percent_volatility_month = volatility_month * 100
                    df.loc[df['Date'].dt.to_period("M") == month_name, 'volatility'] = percent_volatility_month
                self.save_to_csv(df, file_path)

    def calculate_volume_monthly_mean(self):
        for filename in os.listdir(self.folder_path):
            if filename.endswith(".csv"):
                file_path = os.path.join(self.folder_path, filename)
                df = self.process_file(file_path)
                vol_monthly_mean = df.groupby(df['Date'].dt.to_period('M'))['Volume'].mean()
                df['Vol_month_mean'] = df['Date'].dt.to_period('M').map(vol_monthly_mean)
                self.save_to_csv(df, file_path)

# Example of using the ETFProcessor class
if __name__ == "__main__":
    processor = ETFProcessor('C:\\Users\\stebr\\DireDSCoding\\dscoding-projects\\stefano.bruschi\\Data\\etfs')
    processor.daily_return_calculation()
    processor.calculate_monthly_mean()
    processor.calculate_volatility()
    processor.calculate_volume_monthly_mean()

