In [16]:
import pandas as pd
import os
import numpy as np
from datetime import datetime

In [17]:
def create_adjacency_matrix(correlation_matrix, threshold):
    """
    Convert correlation matrix to adjacency matrix using threshold.
    Only fills lower triangular part, leaving other cells empty
    """
    # Create empty matrix of same size as correlation matrix
    n = len(correlation_matrix)
    adjacency = pd.DataFrame(
        np.nan,  # Fill with NaN initially
        index=correlation_matrix.index,
        columns=correlation_matrix.columns
    )
    
    # Fill only lower triangular part
    for i in range(n):
        for j in range(i + 1):  # Only up to i (lower triangle including diagonal)
            value = correlation_matrix.iloc[i, j]
            try:
                # Convert to binary based on threshold
                adjacency.iloc[i, j] = 1 if float(value) >= threshold else 0
            except (ValueError, TypeError):
                # Handle any non-numeric values
                adjacency.iloc[i, j] = 0
            
    return adjacency

In [18]:
def process_correlation_matrices(source_dir, dest_dir, matrix_type):
    """
    Process all correlation matrices in source directory.
    matrix_type should be 'msv' or 'rsv'
    """
    thresholds = [0.4, 0.5, 0.6, 0.8]
    
    # Get all CSV files in source directory
    correlation_files = sorted([f for f in os.listdir(source_dir) if f.endswith('.csv')])
    
    for correlation_file in correlation_files:
        print(f"\nProcessing: {correlation_file}")
        
        # Extract date from filename
        date_parts = correlation_file.split('_')[-3:]  # Get last three parts
        year = date_parts[0]
        month = date_parts[1]
        day = date_parts[2].replace('.csv', '')
        
        # Read correlation matrix
        correlation_matrix = pd.read_csv(os.path.join(source_dir, correlation_file), index_col=0)
        
        # Process for each threshold
        for threshold in thresholds:
            # Create adjacency matrix
            adjacency_matrix = create_adjacency_matrix(correlation_matrix, threshold)
            
            # Create filename for adjacency matrix
            adj_filename = f"{matrix_type}_{threshold}_{year}_{month}_{day}.csv"
            
            # Define destination path
            threshold_dir = f"{threshold}_threshold"
            dest_path = os.path.join(dest_dir, threshold_dir, adj_filename)
            
            # Save adjacency matrix
            adjacency_matrix.to_csv(dest_path)
            print(f"Created adjacency matrix for threshold {threshold}: {adj_filename}")

In [19]:
# Define source and destination directories
source_dirs = [
    "./gt_dcorr_matrices_msv_15day",
    "./gt_dcorr_matrices_msv_30day",
    "./gt_dcorr_matrices_rsv_normal_15day",
    "./gt_dcorr_matrices_rsv_normal_30day"
]

dest_dirs = [
    "./gt_adj_matrices_msv_15day",
    "./gt_adj_matrices_msv_30day",
    "./gt_adj_matrices_rsv_normal_15day",
    "./gt_adj_matrices_rsv_normal_30day"
]

In [20]:
# Process each directory
for source_dir, dest_dir in zip(source_dirs, dest_dirs):
    print(f"\nProcessing directory: {source_dir}")
    
    # Determine matrix type (msv or rsv)
    matrix_type = "msv" if "msv" in source_dir else "rsv"
    
    # Process matrices
    if os.path.exists(source_dir):
        process_correlation_matrices(source_dir, dest_dir, matrix_type)
    else:
        print(f"Source directory not found: {source_dir}")


Processing directory: ./gt_dcorr_matrices_msv_15day

Processing: msv_distance_correlation_2020_03_30.csv
Created adjacency matrix for threshold 0.4: msv_0.4_2020_03_30.csv
Created adjacency matrix for threshold 0.5: msv_0.5_2020_03_30.csv
Created adjacency matrix for threshold 0.6: msv_0.6_2020_03_30.csv
Created adjacency matrix for threshold 0.8: msv_0.8_2020_03_30.csv

Processing: msv_distance_correlation_2020_03_31.csv
Created adjacency matrix for threshold 0.4: msv_0.4_2020_03_31.csv
Created adjacency matrix for threshold 0.5: msv_0.5_2020_03_31.csv
Created adjacency matrix for threshold 0.6: msv_0.6_2020_03_31.csv
Created adjacency matrix for threshold 0.8: msv_0.8_2020_03_31.csv

Processing: msv_distance_correlation_2020_04_01.csv
Created adjacency matrix for threshold 0.4: msv_0.4_2020_04_01.csv
Created adjacency matrix for threshold 0.5: msv_0.5_2020_04_01.csv
Created adjacency matrix for threshold 0.6: msv_0.6_2020_04_01.csv
Created adjacency matrix for threshold 0.8: msv_0.8