In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
import torch
import numpy as np
import pandas as pd
import time
import argparse
from functools import partial

# add code directory to path
import sys
sys.path.append('/cluster/home/kheuto01/code/prob_diff_topk')

from metrics import top_k_onehot_indicator
from torch_perturb.perturbations import perturbed
from torch_models import NegativeBinomialDebug, torch_bpr_uncurried, deterministic_bpr


2025-01-29 13:09:14.574671: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-29 13:09:14.620754: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-29 13:09:14.620784: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-29 13:09:14.622012: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-29 13:09:14.629981: I tensorflow/core/platform/cpu_feature_guar

In [2]:


def convert_df_to_3d_array(df):
    # Ensure the DataFrame has a MultiIndex with 'geoid' and 'timestep'
    if not isinstance(df.index, pd.MultiIndex) or set(df.index.names) != {'geoid', 'timestep'}:
        raise ValueError("DataFrame must have a MultiIndex with levels 'geoid' and 'timestep'")

    # Get unique geoids and timesteps, sorted
    geoids = sorted(df.index.get_level_values('geoid').unique())
    timesteps = sorted(df.index.get_level_values('timestep').unique())

    # Create a mapping of geoids to indices
    geoid_to_idx = {geoid: idx for idx, geoid in enumerate(geoids)}

    # Initialize the 3D array
    num_timesteps = len(timesteps)
    num_locations = len(geoids)
    num_features = len(df.columns)
    X = np.zeros((num_timesteps, num_locations, num_features))

    # Fill the 3D array
    for (geoid, timestep), row in df.iterrows():
        t_idx = timesteps.index(timestep)
        g_idx = geoid_to_idx[geoid]
        X[t_idx, g_idx, :] = row.values

    return X, geoids, timesteps

def convert_y_df_to_2d_array(y_df, geoids, timesteps):
    # Ensure the DataFrame has a MultiIndex with 'geoid' and 'timestep'
    if not isinstance(y_df.index, pd.MultiIndex) or set(y_df.index.names) != {'geoid', 'timestep'}:
        raise ValueError("DataFrame must have a MultiIndex with levels 'geoid' and 'timestep'")

    # Initialize the 2D array
    num_timesteps = len(timesteps)
    num_locations = len(geoids)
    y = np.zeros((num_timesteps, num_locations))

    # Create a mapping of geoids to indices
    geoid_to_idx = {geoid: idx for idx, geoid in enumerate(geoids)}

    # Fill the 2D array
    for (geoid, timestep), value in y_df.iloc[:, 0].items():
        t_idx = timesteps.index(timestep)
        g_idx = geoid_to_idx[geoid]
        y[t_idx, g_idx] = value

    return y

def evaluate_model(model, X, y, time, K, M_score_func, perturbed_top_K_func):
    """Evaluate model on given data and return metrics."""
    with torch.no_grad():
        dist = model(X, time)
        
        # Sample and calculate ratio ratings
        y_sample_TMS = dist.sample((M_score_func,)).permute(1, 0, 2)
        ratio_rating_TMS = y_sample_TMS/(1+y_sample_TMS.sum(dim=-1, keepdim=True))
        ratio_rating_TS = ratio_rating_TMS.mean(dim=1)
        
        # Calculate metrics
        nll = -model.log_likelihood(y, X, time)
        perturbed_bpr_T = torch_bpr_uncurried(ratio_rating_TS, y, K=K, 
                                             perturbed_top_K_func=perturbed_top_K_func)
        deterministic_bpr_T = deterministic_bpr(ratio_rating_TS, y, K=K)
        
        metrics = {
            'nll': nll.item(),
            'perturbed_bpr': torch.mean(perturbed_bpr_T).item(),
            'deterministic_bpr': torch.mean(deterministic_bpr_T).item()
        }
        
        return metrics
    
from torch import cuda


def get_less_used_gpu(gpus=None, debug=False):
    """Inspect cached/reserved and allocated memory on specified gpus and return the id of the less used device"""
    if gpus is None:
        warn = 'Falling back to default: all gpus'
        gpus = range(cuda.device_count())
    elif isinstance(gpus, str):
        gpus = [int(el) for el in gpus.split(',')]

    # check gpus arg VS available gpus
    sys_gpus = list(range(cuda.device_count()))
    if len(gpus) > len(sys_gpus):
        gpus = sys_gpus
        warn = f'WARNING: Specified {len(gpus)} gpus, but only {cuda.device_count()} available. Falling back to default: all gpus.\nIDs:\t{list(gpus)}'
    elif set(gpus).difference(sys_gpus):
        # take correctly specified and add as much bad specifications as unused system gpus
        available_gpus = set(gpus).intersection(sys_gpus)
        unavailable_gpus = set(gpus).difference(sys_gpus)
        unused_gpus = set(sys_gpus).difference(gpus)
        gpus = list(available_gpus) + list(unused_gpus)[:len(unavailable_gpus)]
        warn = f'GPU ids {unavailable_gpus} not available. Falling back to {len(gpus)} device(s).\nIDs:\t{list(gpus)}'

    cur_allocated_mem = {}
    cur_cached_mem = {}
    max_allocated_mem = {}
    max_cached_mem = {}
    for i in gpus:
        cur_allocated_mem[i] = cuda.memory_allocated(i)
        cur_cached_mem[i] = cuda.memory_reserved(i)
        max_allocated_mem[i] = cuda.max_memory_allocated(i)
        max_cached_mem[i] = cuda.max_memory_reserved(i)
    min_allocated = min(cur_allocated_mem, key=cur_allocated_mem.get)
    if debug:
        print(warn)
        print('Current allocated memory:', {f'cuda:{k}': v for k, v in cur_allocated_mem.items()})
        print('Current reserved memory:', {f'cuda:{k}': v for k, v in cur_cached_mem.items()})
        print('Maximum allocated memory:', {f'cuda:{k}': v for k, v in max_allocated_mem.items()})
        print('Maximum reserved memory:', {f'cuda:{k}': v for k, v in max_cached_mem.items()})
        print('Suggested GPU:', min_allocated)
    return min_allocated
import gc
import inspect
def free_memory(to_delete: list, debug=False):
    import gc
    import inspect
    calling_namespace = inspect.currentframe().f_back
    if debug:
        print('Before:')
        get_less_used_gpu(debug=True)

    for _var in to_delete:
        calling_namespace.f_locals.pop(_var, None)
        gc.collect()
        cuda.empty_cache()
    if debug:
        print('After:')
        get_less_used_gpu(debug=True)

def train_epoch_neg_binom(model, optimizer, K, threshold,
                         M_score_func, feat_TSF,
                         time_T, train_y_TS,
                         perturbed_top_K_func, bpr_weight, nll_weight, update=True):
    """Train one epoch of the negative binomial model."""
    model.train()
    optimizer.zero_grad()
    
    total_loss = 0
    total_gradient_P = None
    
    for t in range(feat_TSF.shape[0]):
        print(f'T: {t}!')
        dist = model(feat_TSF[t:t+1], time_T[t:t+1])
        
        y_sample_TMS = dist.sample((M_score_func,)).permute(1, 0, 2)
        action_denominator_TM = y_sample_TMS.sum(dim=-1, keepdim=True) + 1 

        ratio_rating_TMS = y_sample_TMS / action_denominator_TM
        ratio_rating_TS = ratio_rating_TMS.mean(dim=1)
        ratio_rating_TS.requires_grad_(True)

        def get_log_probs_baked(param):
            distribution = model.build_from_single_tensor(param, feat_TSF[t:t+1], time_T[t:t+1])
            log_probs_TMS = distribution.log_prob(y_sample_TMS.permute(1, 0, 2)).permute(1, 0, 2)
            return log_probs_TMS

        jac_TMSP = torch.autograd.functional.jacobian(get_log_probs_baked, 
                                                      (model.params_to_single_tensor()), 
                                                      strategy='forward-mode', 
                                                      vectorize=True)

        score_func_estimator_TMSP = jac_TMSP * ratio_rating_TMS.unsqueeze(-1)
        score_func_estimator_TSP = score_func_estimator_TMSP.mean(dim=1)    

        positive_bpr_T = torch_bpr_uncurried(ratio_rating_TS, torch.tensor(train_y_TS[t:t+1]), 
                                             K=K, perturbed_top_K_func=perturbed_top_K_func)

        if nll_weight > 0:
            bpr_threshold_diff_T = positive_bpr_T - threshold
            violate_threshold_flag = bpr_threshold_diff_T < 0
            negative_bpr_loss = torch.mean(-bpr_threshold_diff_T * violate_threshold_flag)
        else:
            negative_bpr_loss = torch.mean(-positive_bpr_T)

        nll = -model.log_likelihood(train_y_TS[t:t+1], feat_TSF[t:t+1], time_T[t:t+1])
        loss = bpr_weight * negative_bpr_loss + nll_weight * nll
        loss.backward()

        loss_grad_TS = ratio_rating_TS.grad
        gradient_TSP = score_func_estimator_TSP * torch.unsqueeze(loss_grad_TS, -1)
        gradient_P = torch.sum(gradient_TSP, dim=[0, 1])
        
        if total_gradient_P is None:
            total_gradient_P = gradient_P
        else:
            total_gradient_P += gradient_P
        
        total_loss += loss.item()
        print(f'{jac_TMSP.shape}')
        # Measure memory before and after an operation
        start_memory = torch.cuda.memory_allocated()
        del jac_TMSP, score_func_estimator_TMSP, y_sample_TMS, dist, gradient_TSP, score_func_estimator_TSP
        gc.collect()
        with torch.no_grad():
            torch.cuda.empty_cache()
        end_memory = torch.cuda.memory_allocated()
        print(f'Memory difference: {(end_memory - start_memory)/1024**2:.2f} MB')
        print(f'Ending memory: {end_memory/1024**2:.2f} MB')
        #print(f'{jac_TMSP.shape}')

    gradient_tuple = model.single_tensor_to_params(total_gradient_P)

    for param, gradient in zip(model.parameters(), gradient_tuple):
        if nll_weight > 0:
            gradient = gradient + param.grad
        param.grad = gradient

    if update:
        optimizer.step()

    deterministic_bpr_T = deterministic_bpr(ratio_rating_TS, torch.tensor(train_y_TS), K=K)
    det_bpr = torch.mean(deterministic_bpr_T)

    metrics = {
        'loss': total_loss ,
        'deterministic_bpr': det_bpr.item(),
        'perturbed_bpr': torch.mean(positive_bpr_T).item(),
        'nll': nll.item()
    }

    return metrics, model

In [3]:
good_nll_model = '/cluster/tufts/hugheslab/kheuto01/opioid_grid_try_fix_params/MA/K100_bw30_nw1_ss0.001_nss100_nps100_seed123_sig0.001_tr0.5'
data_dir = '/cluster/tufts/hugheslab/fmuenc01/code/prob_diff_topk/data_dir/asurv/2monthly_ctxtSize5_small_5yrTrain//' #

In [10]:
K = 50
bpr_weight = 0
nll_weight = 1
step_size = 0.001
num_score_samples = 3
num_pert_samples = 100
seed = 123
perturbed_noise = 0.001
threshold = 0.5
epochs = 1
outdir = '/cluster/tufts/hugheslab/kheuto01/debug'
device = 'cuda'
val_freq = 10

In [11]:
# Set random seed for reproducibility
if seed is not None:
    torch.manual_seed(seed)
    np.random.seed(seed)

# Load training data
train_X_df = pd.read_csv(os.path.join(data_dir, 'bird_train_x.csv'), index_col=[0,1])
train_Y_df = pd.read_csv(os.path.join(data_dir, 'bird_train_y.csv'), index_col=[0,1])

# Load validation data
val_X_df = pd.read_csv(os.path.join(data_dir, 'bird_valid_x.csv'), index_col=[0,1])
val_Y_df = pd.read_csv(os.path.join(data_dir, 'bird_valid_y.csv'), index_col=[0,1])

# Process training data
train_X, geoids, timesteps = convert_df_to_3d_array(train_X_df)#.drop(columns='timestep.1'))
train_time_arr = np.array([timesteps] * len(geoids)).T
train_y = convert_y_df_to_2d_array(train_Y_df, geoids, timesteps)

# Process validation data
val_X, _, val_timesteps = convert_df_to_3d_array(val_X_df)#.drop(columns='timestep.1'))
val_time_arr = np.array([val_timesteps] * len(geoids)).T
val_y = convert_y_df_to_2d_array(val_Y_df, geoids, val_timesteps)

# Convert to tensors and move to device
X_train = torch.tensor(train_X, dtype=torch.float32).to(device)
y_train = torch.tensor(train_y, dtype=torch.float32).to(device)
time_train = torch.tensor(train_time_arr, dtype=torch.float32).to(device)

X_val = torch.tensor(val_X, dtype=torch.float32).to(device)
y_val = torch.tensor(val_y, dtype=torch.float32).to(device)
time_val = torch.tensor(val_time_arr, dtype=torch.float32).to(device)

# Initialize model
model = NegativeBinomialDebug(
    num_locations=len(geoids),
    num_fixed_effects=train_X.shape[2], device=device
).to(device)

# Setup optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=step_size)

# Setup top-k function
top_k_func = partial(top_k_onehot_indicator, k=K)
perturbed_top_K_func = perturbed(top_k_func, sigma=perturbed_noise, num_samples=num_pert_samples)

# Initialize metric tracking with separate epoch tracking for validation
metrics = {
    'train': {
        'epochs': [], 
        'loss': [], 
        'nll': [], 
        'perturbed_bpr': [], 
        'deterministic_bpr': []
    },
    'val': {
        'epochs': [], 
        'nll': [], 
        'perturbed_bpr': [], 
        'deterministic_bpr': []
    },
    'times': []
}

best_val_loss = float('inf')


In [12]:
X_train.shape

torch.Size([15, 1338, 9])

In [13]:
start = time.time()
epoch = 0

# Training step
train_metrics, model = train_epoch_neg_binom(
    model, optimizer, K, threshold,
    num_score_samples, X_train, time_train,
    y_train, perturbed_top_K_func,
    bpr_weight, nll_weight, device
)

# Update training metrics
metrics['train']['epochs'].append(epoch)
for metric, value in train_metrics.items():
    metrics['train'][metric].append(value)

T: 0!


  positive_bpr_T = torch_bpr_uncurried(ratio_rating_TS, torch.tensor(train_y_TS[t:t+1]),


torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 2287.57 MB
T: 1!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 2455.70 MB
T: 2!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 2623.83 MB
T: 3!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 2791.95 MB
T: 4!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 2960.07 MB
T: 5!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 3128.19 MB
T: 6!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 3296.31 MB
T: 7!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 3464.44 MB
T: 8!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 3632.56 MB
T: 9!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending memory: 3800.68 MB
T: 10!
torch.Size([1, 3, 1338, 2690])
Memory difference: -112.03 MB
Ending m

  deterministic_bpr_T = deterministic_bpr(ratio_rating_TS, torch.tensor(train_y_TS), K=K)


In [14]:
metrics

{'train': {'epochs': [0],
  'loss': [inf],
  'nll': [inf],
  'perturbed_bpr': [0.03333333134651184],
  'deterministic_bpr': [0.0]},
 'val': {'epochs': [],
  'nll': [],
  'perturbed_bpr': [],
  'deterministic_bpr': []},
 'times': []}