In [None]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

: 

In [None]:
# Function to load and prepare data from CSV files
def load_ticker_data(archive_path):
    dfs = {}
    for file in os.listdir(archive_path):
        if file.endswith('.csv'):
            ticker = file.split('.')[0]  # Get ticker name from filename
            df = pd.read_csv(os.path.join(archive_path, file))
            df['Date'] = pd.to_datetime(df['Date'])  # Assuming there's a Date column
            df.set_index('Date', inplace=True)
            dfs[ticker] = df['Close']  # We'll use closing prices for correlation
    return pd.DataFrame(dfs)

# Load all ticker data
archive_path = 'path/to/your/archive/folder'  # Update this path
price_data = load_ticker_data(archive_path)

# Calculate correlation matrix
correlation_matrix = price_data.corr()

# Create a heatmap of correlations
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Asset Pairs')
plt.tight_layout()
plt.show()

In [None]:
# Find highly correlated pairs
def find_pairs(correlation_matrix, threshold=0.8):
    pairs = []
    for i in range(len(correlation_matrix.columns)):
        for j in range(i+1, len(correlation_matrix.columns)):
            corr = correlation_matrix.iloc[i, j]
            if abs(corr) > threshold:
                pairs.append({
                    'asset1': correlation_matrix.columns[i],
                    'asset2': correlation_matrix.columns[j],
                    'correlation': corr
                })
    return pd.DataFrame(pairs)

# Get highly correlated pairs
correlated_pairs = find_pairs(correlation_matrix)
print("\nHighly correlated pairs:")
print(correlated_pairs)

In [None]:
# Function to calculate spread and test for cointegration
def analyze_pair(price_data, asset1, asset2):
    # Calculate price ratio
    ratio = price_data[asset1] / price_data[asset2]
    
    # Perform Augmented Dickey-Fuller test for cointegration
    adf_result = stats.adfuller(ratio)
    
    return {
        'ratio_mean': ratio.mean(),
        'ratio_std': ratio.std(),
        'adf_statistic': adf_result[0],
        'adf_pvalue': adf_result[1]
    }

# Analyze each pair for cointegration
if not correlated_pairs.empty:
    for _, pair in correlated_pairs.iterrows():
        result = analyze_pair(price_data, pair['asset1'], pair['asset2'])
        print(f"\nAnalysis for {pair['asset1']} - {pair['asset2']}:")
        print(f"ADF Statistic: {result['adf_statistic']:.4f}")
        print(f"ADF p-value: {result['adf_pvalue']:.4f}")
        print(f"Mean ratio: {result['ratio_mean']:.4f}")
        print(f"Ratio std: {result['ratio_std']:.4f}")