-
Notifications
You must be signed in to change notification settings - Fork 6
Open
Description
When the Seldonian model is a PyTorch model which contains calls to torch.nn.Dropout, gradient descent produces nans. Removing the dropout calls only but keeping all other layers resolves the issue.
Example:
from seldonian.models.pytorch_model import SupervisedPytorchBaseModel
import torch.nn as nn
import torch
class testCNN(nn.Module):
def __init__(self):
super(testCNN,self).__init__()
# Define all layers here
self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
self.cnn3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.cnn4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
self.relu = nn.ReLU()
self.maxpool = nn.MaxPool2d(kernel_size=2)
self.Batch1=nn.BatchNorm2d(16)
self.Batch2=nn.BatchNorm2d(32)
self.Batch3=nn.BatchNorm2d(64)
self.Batch4=nn.BatchNorm2d(128)
self.Drop1=nn.Dropout(0.2)
self.Drop2=nn.Dropout(0.5)
# Fully connected 1 (readout)
self.fc1 = nn.Linear(128 * 1 * 1, 128)
self.fc2=nn.Linear(128,256)
self.fc3=nn.Linear(256,2)
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
# Call all layers here
out = self.cnn1(x)
out = self.relu(out)
out = self.maxpool(out)
out=self.Batch1(out)
out=self.Drop1(out)
out = self.cnn2(out)
out = self.relu(out)
out = self.maxpool(out)
out=self.Batch2(out)
out=self.Drop1(out)
out = self.cnn3(out)
out = self.relu(out)
out = self.maxpool(out)
out=self.Batch3(out)
out=self.Drop1(out)
out = self.cnn4(out)
out = self.relu(out)
out = self.maxpool(out)
out=self.Batch4(out)
out=self.Drop1(out)
# Resize
# Original size: (100, 32, 7, 7)
# out.size(0): 100
# New out size: (100, 32*7*7)
out = out.view(out.size(0), -1)
# Linear function (readout)
out = self.fc1(out)
out=self.Drop2(out)
out=self.fc2(out)
out=self.Drop2(out)
out=self.fc3(out)
# Softmax to make probabilities
out=self.softmax(out)[:,1]
return out
class SeldoPytorchTestModel(SupervisedPytorchBaseModel):
def __init__(self,device):
""" Implements a CNN with PyTorch.
CNN consists of two hidden layers followed
by a linear + softmax output layer
:param input_dim: Number of features
:param output_dim: Size of output layer (number of label columns)
"""
super().__init__(device)
def create_model(self,**kwargs):
""" Create the pytorch model and return it
Inputs are N,1,28,28 where N is the number of them,
1 channel and 28x28 pixels.
Do Conv2d,ReLU,maxpool twice then
output in a fully connected layer to 10 output classes
"""
return testCNN()
import autograd.numpy as np # Thinly-wrapped version of Numpy
from seldonian.spec import SupervisedSpec
from seldonian.dataset import SupervisedDataSet
from seldonian.utils.io_utils import load_pickle,save_pickle
from seldonian.models import objectives
from seldonian.seldonian_algorithm import SeldonianAlgorithm
from seldonian.parse_tree.parse_tree import (
make_parse_trees_from_constraints)
import torch
if __name__ == "__main__":
torch.manual_seed(0)
regime='supervised_learning'
sub_regime='classification'
N=23700 # Clips off 5 samples (at random) to make total divisible by 150,
# the desired batch size
# Get the data, load from file if already saved
savename_features = './features.pkl'
savename_labels = './labels.pkl'
savename_sensitive_attrs = './sensitive_attrs.pkl'
features = load_pickle(savename_features)
labels = load_pickle(savename_labels)
sensitive_attrs = load_pickle(savename_sensitive_attrs)
assert len(features) == N
assert len(labels) == N
assert len(sensitive_attrs) == N
frac_data_in_safety = 0.5
sensitive_col_names = ['M','F']
meta_information = {}
meta_information['feature_col_names'] = ['img']
meta_information['label_col_names'] = ['label']
meta_information['sensitive_col_names'] = sensitive_col_names
meta_information['sub_regime'] = sub_regime
print("Making SupervisedDataSet...")
dataset = SupervisedDataSet(
features=features,
labels=labels,
sensitive_attrs=sensitive_attrs,
num_datapoints=N,
meta_information=meta_information)
constraint_strs = ['min((ACC | [M])/(ACC | [F]),(ACC | [F])/(ACC | [M])) >= 0.8']
deltas = [0.05]
print("Making parse trees for constraint(s):")
print(constraint_strs," with deltas: ", deltas)
parse_trees = make_parse_trees_from_constraints(
constraint_strs,deltas,regime=regime,
sub_regime=sub_regime,columns=sensitive_col_names)
device = torch.device("mps")
model = SeldoPyTorchTestModel(device)
initial_solution_fn = model.get_model_params
spec = SupervisedSpec(
dataset=dataset,
model=model,
parse_trees=parse_trees,
frac_data_in_safety=frac_data_in_safety,
primary_objective=objectives.binary_logistic_loss,
use_builtin_primary_gradient_fn=False,
sub_regime=sub_regime,
initial_solution_fn=initial_solution_fn,
optimization_technique='gradient_descent',
optimizer='adam',
optimization_hyperparams={
'lambda_init' : np.array([0.5]),
'alpha_theta' : 0.001,
'alpha_lamb' : 0.001,
'beta_velocity' : 0.9,
'beta_rmsprop' : 0.95,
'use_batches' : True,
'batch_size' : 237,
'n_epochs' : 40,
'gradient_library': "autograd",
'hyper_search' : None,
'verbose' : True,
},
batch_size_safety=2000
)
save_pickle('./spec.pkl',spec,verbose=True)
SA = SeldonianAlgorithm(spec)
passed_safety,solution = SA.run(debug=True,write_cs_logfile=True)
if passed_safety:
print("Passed safety test")
st_primary_objective = SA.evaluate_primary_objective(theta=solution,
branch='safety_test')
else:
print("Failed safety test")Running the SeldoPytorchTestModel in the engine produces nans after only a few steps of gradient descent:
Have 40 epochs and 50 batches of size 237 for a total of 2000 iterations
Epoch: 0, batch iteration 0
epoch,batch_i,overall_i,f,g,theta,lambda: 0 0 0 0.6966847490660751 [-0.11317656] [-0.00249562 0.17881456 -0.27434838 ... 0.02548225 -0.05133569
0.03980389] [0.5]
epoch,batch_i,overall_i,f,g,theta,lambda: 0 1 1 0.6979988501544743 [-0.11469971] [-0.00249562 0.17881456 -0.27434838 ... 0.02448849 -0.05033569
0.03880389] [0.49988682]
epoch,batch_i,overall_i,f,g,theta,lambda: 0 2 2 0.6960756663028701 [-0.11772262] [-0.00249562 0.17881456 -0.27434838 ... 0.02493663 -0.04869457
0.03716277] [0.49977212]
epoch,batch_i,overall_i,f,g,theta,lambda: 0 3 3 0.6964742956282217 [-0.12167626] [-0.00249562 0.17881456 -0.27434838 ... 0.02554206 -0.0465446
0.0350128 ] [0.4996544]
File '/Users/ahoag/anaconda3/envs/seldo-pytorch/lib/python3.9/site-packages/autograd/numpy/numpy_vjps.py', line 338
divide by zero encountered in double_scalars
File '/Users/ahoag/anaconda3/envs/seldo-pytorch/lib/python3.9/site-packages/autograd/numpy/numpy_vjps.py', line 340
invalid value encountered in multiply
epoch,batch_i,overall_i,f,g,theta,lambda: 0 4 4 nan [inf] [-0.00249562 0.17881456 -0.27434838 ... nan nan
nan] [0.49953272]
File '/Users/ahoag/anaconda3/envs/seldo-pytorch/lib/python3.9/site-packages/seldonian/optimizers/gradient_descent.py', line 191
Warning: a nan or inf was found during gradient descent. Stopping prematurely and returning NSF.
Wrote /Users/ahoag/beri/code/engine-repo-dev/examples/debug_dropout_issue/logs/candidate_selection_log1.p with candidate selection log info
Failed safety testRemoving the dropout layers from forward() makes the nans go away and gradient descent proceeds normally. We'd like to be able to support dropout layers, so understanding why dropouts produce this issue is a high priority.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels