In [1]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
from math import comb

In [2]:
# Paths for both MSV and RSV
main_dirs = [
    "../gt_corr_adj_matrix/gt_adj_matrices_msv_15day",
    "../gt_corr_adj_matrix/gt_adj_matrices_msv_30day",
    "../gt_corr_adj_matrix/gt_adj_matrices_rsv_normal_15day",
    "../gt_corr_adj_matrix/gt_adj_matrices_rsv_normal_30day"
]

In [3]:
thresholds = [0.4, 0.5, 0.6, 0.8]

In [4]:
def calculate_network_density(adj_matrix, debug=False):
    """
    Calculate network density using fixed denominator of C(15,2) = 105.
    Only considers lower triangular part of matrix.
    """
    n = 15
    total_possible_edges = comb(n, 2)  # This equals 105
    
    # Count actual edges (1s) in lower triangular part
    lower_tri = np.tril(adj_matrix.values, k=-1)  # k=-1 excludes diagonal
    actual_edges = np.sum(lower_tri == 1)
    
    if debug:
        print(f"\nTotal possible edges: {total_possible_edges}")
        print(f"Actual edges found: {actual_edges}")
    
    # Calculate density
    density = actual_edges / total_possible_edges
    
    return density

In [5]:
def process_adjacency_matrices(main_dir, threshold):
    """Process all adjacency matrices for network density."""
    threshold_dir = os.path.join(main_dir, f"{threshold}_threshold")
    
    if not os.path.exists(threshold_dir):
        print(f"Directory not found: {threshold_dir}")
        return pd.DataFrame()
    
    dates = []
    densities = []
    
    adj_files = sorted([f for f in os.listdir(threshold_dir) if f.endswith('.csv')])
    
    for adj_file in adj_files:
        # Extract and format date
        date_parts = adj_file.split('_')[-3:]
        formatted_date = f"{date_parts[0]}-{date_parts[1]}-{date_parts[2].replace('.csv', '')}"
        
        print(f"Processing {adj_file}")
        
        try:
            # Read adjacency matrix
            file_path = os.path.join(threshold_dir, adj_file)
            adj_matrix = pd.read_csv(file_path, index_col=0)
            
            if adj_matrix.shape != (15, 15):
                print(f"Warning: Matrix size is {adj_matrix.shape}, expected (15, 15)")
                continue
            
            # Calculate network density
            density = calculate_network_density(adj_matrix)
            
            dates.append(formatted_date)
            densities.append(density)
            print(f"Processed successfully: density = {density}")
            
        except Exception as e:
            print(f"Error processing {adj_file}: {str(e)}")
            continue
    
    # Create DataFrame
    return pd.DataFrame({
        'date': dates,
        'network_density': densities
    })

In [6]:
# Process each directory and threshold
for main_dir in main_dirs:
    matrix_type = 'msv' if 'msv' in main_dir else 'rsv'
    window_size = '15' if '15day' in main_dir else '30'
    
    print(f"\nProcessing {main_dir}")
    
    for threshold in thresholds:
        print(f"\nProcessing threshold {threshold}")
        
        try:
            # Calculate densities
            results_df = process_adjacency_matrices(main_dir, threshold)
            
            if not results_df.empty:
                # Sort by date
                results_df = results_df.sort_values('date')
                
                # Create output filename
                output_filename = f"netdense_{matrix_type}_{threshold}_{window_size}day.csv"
                
                # Save results
                results_df.to_csv(output_filename, index=False)
                print(f"Created {output_filename} with {len(results_df)} entries")
            else:
                print(f"No results generated for {main_dir} threshold {threshold}")
            
        except Exception as e:
            print(f"Error processing {main_dir} with threshold {threshold}: {str(e)}")


Processing ../gt_corr_adj_matrix/gt_adj_matrices_msv_15day

Processing threshold 0.4
Processing msv_0.4_2020_03_30.csv
Processed successfully: density = 0.7333333333333333
Processing msv_0.4_2020_03_31.csv
Processed successfully: density = 0.780952380952381
Processing msv_0.4_2020_04_01.csv
Processed successfully: density = 0.7523809523809524
Processing msv_0.4_2020_04_02.csv
Processed successfully: density = 0.7714285714285715
Processing msv_0.4_2020_04_03.csv
Processed successfully: density = 0.6952380952380952
Processing msv_0.4_2020_04_04.csv
Processed successfully: density = 0.7142857142857143
Processing msv_0.4_2020_04_05.csv
Processed successfully: density = 0.8095238095238095
Processing msv_0.4_2020_04_06.csv
Processed successfully: density = 0.780952380952381
Processing msv_0.4_2020_04_07.csv
Processed successfully: density = 0.7428571428571429
Processing msv_0.4_2020_04_08.csv
Processed successfully: density = 0.7428571428571429
Processing msv_0.4_2020_04_09.csv
Processed su