In [2]:
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def calculate_and_save_correlation_matrices(data_folder, output_folder):
    # Load all CSV files from the specified folder
    all_data = {}
    for filename in os.listdir(data_folder):
        if filename.endswith('.csv'):
            # Extract symbol from file name
            symbol = os.path.splitext(filename)[0]
            filepath = os.path.join(data_folder, filename)
            df = pd.read_csv(filepath)
            
            # Rename columns for consistency
            df.rename(columns={'snapped_at': 'date', 'price': symbol}, inplace=True)
            df['date'] = pd.to_datetime(df['date'], errors='coerce')
            
            # Keep only date and price columns
            all_data[symbol] = df[['date', symbol]]

    # Merge all assets on the 'date' column
    merged_data = all_data[list(all_data.keys())[0]]
    for symbol, df in all_data.items():
        if symbol != list(all_data.keys())[0]:
            merged_data = pd.merge(merged_data, df, on='date', how='outer')

    # Sort by date and drop rows with NaN values in prices
    merged_data.sort_values('date', inplace=True)
    merged_data.dropna(inplace=True)

    # Calculate log returns for each asset
    log_returns = merged_data.copy()
    for col in merged_data.columns[1:]:
        log_returns[col] = np.log(merged_data[col] / merged_data[col].shift(1))

    log_returns.dropna(inplace=True)

    # Calculate correlation matrices for the last 7 days
    end_date = log_returns['date'].max()
    correlation_matrices = {}

    for i in range(7):
        cutoff_date = end_date - timedelta(days=i)
        data_last_days = log_returns[log_returns['date'] <= cutoff_date].iloc[-7:]  # Last 7 days window
        corr_matrix = data_last_days.iloc[:, 1:].corr()
        # Multiply by 100 and round to integers
        corr_matrix = (corr_matrix * 100).round().astype(int)
        correlation_matrices[cutoff_date.strftime('%Y-%m-%d')] = corr_matrix

    # Save each correlation matrix as a CSV
    os.makedirs(output_folder, exist_ok=True)
    for date, matrix in correlation_matrices.items():
        matrix.to_csv(f"{output_folder}/correlation_matrix_{date}.csv")
    print(f"Correlation matrices saved to: {output_folder}")

# Usage example
if __name__ == "__main__":
    # Specify the folder containing the CSV files
    data_folder = "Data"  # Replace with your data folder path
    output_folder = "Correlation"  # Replace with your desired output folder path
    
    calculate_and_save_correlation_matrices(data_folder, output_folder)


Correlation matrices saved to: Correlation
