Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
tensorboy committed Jul 27, 2018
1 parent 5b3d879 commit 0ec63b4
Show file tree
Hide file tree
Showing 15 changed files with 387 additions and 45 deletions.
Binary file added __pycache__/pid.cpython-36.pyc
Binary file not shown.
12 changes: 12 additions & 0 deletions mnist_moment.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import os
from utils import Bar, Logger, AverageMeter, accuracy, mkdir_p, savefig
import torch.nn.functional as F

model_save_dir = '/data/mnist/models'

# Hyper Parameters
input_size = 784
hidden_size = 1000
Expand Down Expand Up @@ -58,6 +61,7 @@ def forward(self, x):
#optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
optimizer = SGD(net.parameters(), lr=learning_rate, weight_decay=0.0001, momentum=0.9)
# Train the Model
iters = 0
for epoch in range(num_epochs):

train_loss_log = AverageMeter()
Expand All @@ -79,6 +83,14 @@ def forward(self, x):
train_loss_log.update(train_loss.data[0], images.size(0))
train_acc_log.update(prec1[0], images.size(0))

save_name = os.path.join(model_save_dir, str(iters)+'.pth.tar')
torch.save({'iter': iters,
'state_dict': net.state_dict(),
'optimizer' : optimizer.state_dict()},
save_name)

iters+=1

if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Acc: %.8f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, train_loss_log.avg, train_acc_log.avg))
Expand Down
118 changes: 118 additions & 0 deletions mnist_moment.py~
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch.optim.sgd import SGD
import os
from utils import Bar, Logger, AverageMeter, accuracy, mkdir_p, savefig
import torch.nn.functional as F

model_save_dir = '/data/mnist/models'

# Hyper Parameters
input_size = 784
hidden_size = 1000
num_classes = 10
num_epochs = 20
batch_size = 100
learning_rate = 0.01

logger = Logger('momentum.txt', title='mnist')
logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])

# MNIST Dataset
train_dataset = dsets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)

test_dataset = dsets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)

# Neural Network Model (1 hidden layer)
class Net(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(Net, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, num_classes)

def forward(self, x):
out = self.fc1(x)
out = F.relu(out)
out = self.fc2(out)
return out

net = Net(input_size, hidden_size, num_classes)
net.cuda()
net.train()
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
optimizer = SGD(net.parameters(), lr=learning_rate, weight_decay=0.0001, momentum=0.9)
# Train the Model
iters = 0
for epoch in range(num_epochs):

train_loss_log = AverageMeter()
train_acc_log = AverageMeter()
val_loss_log = AverageMeter()
val_acc_log = AverageMeter()
for i, (images, labels) in enumerate(train_loader):
# Convert torch tensor to Variable
images = Variable(images.view(-1, 28*28).cuda())
labels = Variable(labels.cuda())

# Forward + Backward + Optimize
optimizer.zero_grad() # zero the gradient buffer
outputs = net(images)
train_loss = criterion(outputs, labels)
train_loss.backward()
optimizer.step()
prec1, prec5 = accuracy(outputs.data, labels.data, topk=(1, 5))
train_loss_log.update(train_loss.data[0], images.size(0))
train_acc_log.update(prec1[0], images.size(0))

save_name = os.path.join(model_save_dir, str(iters)+'.pth.tar')
torch.save({'iter': iters,
'state_dict': net.state_dict(),
'optimizer' : optimizer.state_dict()},
save_name)

iters+=1

if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Acc: %.8f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, train_loss_log.avg, train_acc_log.avg))

# Test the Model
net.eval()
correct = 0
loss = 0
total = 0
for images, labels in test_loader:
images = Variable(images.view(-1, 28*28)).cuda()
labels = Variable(labels).cuda()
outputs = net(images)
test_loss = criterion(outputs, labels)
val_loss_log.update(test_loss.data[0], images.size(0))
prec1, prec5 = accuracy(outputs.data, labels.data, topk=(1, 5))
val_acc_log.update(prec1[0], images.size(0))

logger.append([learning_rate, train_loss_log.avg, val_loss_log.avg, train_acc_log.avg, val_acc_log.avg])
print('Accuracy of the network on the 10000 test images: %.8f %%' % (val_acc_log.avg))
print('Loss of the network on the 10000 test images: %.8f' % (val_loss_log.avg))

logger.close()
logger.plot()

4 changes: 2 additions & 2 deletions mnist_pid.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
batch_size = 100
learning_rate = 0.01

I=1
I=3
I = float(I)
D = 200
D = 100
D = float(D)


Expand Down
112 changes: 112 additions & 0 deletions mnist_pid.py~
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
from pid import PIDOptimizer
import os
import numpy as np
from utils import Bar, Logger, AverageMeter, accuracy, mkdir_p, savefig
import torch.nn.functional as F
# Hyper Parameters
input_size = 784
hidden_size = 1000
num_classes = 10
num_epochs = 20
batch_size = 100
learning_rate = 0.01

I=3
I = float(I)
D = 100
D = float(D)


logger = Logger('pid.txt', title='mnist')
logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.'])

# MNIST Dataset
train_dataset = dsets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)

test_dataset = dsets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)

# Neural Network Model (1 hidden layer)
class Net(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(Net, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, num_classes)

def forward(self, x):
out = self.fc1(x)
out = F.relu(out)
out = self.fc2(out)
return out

net = Net(input_size, hidden_size, num_classes)
net.cuda()
net.train()
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = PIDOptimizer(net.parameters(), lr=learning_rate, weight_decay=0.0001, momentum=0.9, I=I, D=D)
# Train the Model
for epoch in range(num_epochs):

train_loss_log = AverageMeter()
train_acc_log = AverageMeter()
val_loss_log = AverageMeter()
val_acc_log = AverageMeter()
for i, (images, labels) in enumerate(train_loader):
# Convert torch tensor to Variable
images = Variable(images.view(-1, 28*28).cuda())
labels = Variable(labels.cuda())

# Forward + Backward + Optimize
optimizer.zero_grad() # zero the gradient buffer
outputs = net(images)
train_loss = criterion(outputs, labels)
train_loss.backward()
optimizer.step()
prec1, prec5 = accuracy(outputs.data, labels.data, topk=(1, 5))
train_loss_log.update(train_loss.data[0], images.size(0))
train_acc_log.update(prec1[0], images.size(0))

if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Acc: %.8f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, train_loss_log.avg, train_acc_log.avg))

# Test the Model
net.eval()
correct = 0
loss = 0
total = 0
for images, labels in test_loader:
images = Variable(images.view(-1, 28*28)).cuda()
labels = Variable(labels).cuda()
outputs = net(images)
test_loss = criterion(outputs, labels)
val_loss_log.update(test_loss.data[0], images.size(0))
prec1, prec5 = accuracy(outputs.data, labels.data, topk=(1, 5))
val_acc_log.update(prec1[0], images.size(0))

logger.append([learning_rate, train_loss_log.avg, val_loss_log.avg, train_acc_log.avg, val_acc_log.avg])
print('Accuracy of the network on the 10000 test images: %d %%' % (val_acc_log.avg))
print('Loss of the network on the 10000 test images: %.8f' % (val_loss_log.avg))

logger.close()
logger.plot()

Binary file modified moment_vs_pid.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 20 additions & 20 deletions momentum.txt
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
Learning Rate Train Loss Valid Loss Train Acc. Valid Acc.
0.010000 0.509680 0.275800 86.908333 92.210000
0.010000 0.255431 0.213543 92.813333 93.890000
0.010000 0.198463 0.172725 94.408333 95.110000
0.010000 0.162204 0.146753 95.431667 95.830000
0.010000 0.136709 0.126174 96.183333 96.420000
0.010000 0.117841 0.118981 96.771667 96.630000
0.010000 0.103934 0.104209 97.195000 96.940000
0.010000 0.092159 0.095841 97.493333 97.290000
0.010000 0.082991 0.089649 97.791667 97.350000
0.010000 0.075273 0.084053 97.988333 97.520000
0.010000 0.069048 0.082308 98.156667 97.550000
0.010000 0.063580 0.077542 98.346667 97.710000
0.010000 0.058794 0.075400 98.473333 97.730000
0.010000 0.054611 0.072476 98.615000 97.790000
0.010000 0.050820 0.071925 98.718333 97.820000
0.010000 0.047619 0.069473 98.765000 97.920000
0.010000 0.044399 0.067499 98.933333 98.040000
0.010000 0.041778 0.067148 98.973333 98.000000
0.010000 0.039280 0.065606 99.058333 98.030000
0.010000 0.037374 0.065613 99.118333 97.950000
0.010000 0.512949 0.275448 86.724998 92.290001
0.010000 0.254912 0.212320 92.735001 93.939995
0.010000 0.200160 0.175231 94.361671 94.970001
0.010000 0.163189 0.147223 95.411667 95.829994
0.010000 0.137898 0.131465 96.161667 96.309998
0.010000 0.119446 0.117758 96.721664 96.619995
0.010000 0.105326 0.109397 97.083336 96.769997
0.010000 0.093634 0.101569 97.418335 97.059998
0.010000 0.084467 0.091853 97.735001 97.309998
0.010000 0.076489 0.088252 97.948334 97.509995
0.010000 0.069930 0.083296 98.138336 97.570000
0.010000 0.064401 0.081024 98.288338 97.649994
0.010000 0.059462 0.076478 98.431671 97.739998
0.010000 0.055186 0.077290 98.588333 97.799995
0.010000 0.051424 0.073510 98.681671 97.889999
0.010000 0.048041 0.070922 98.794998 97.930000
0.010000 0.044974 0.069490 98.889999 97.879997
0.010000 0.042496 0.069584 98.956665 97.879997
0.010000 0.039819 0.065458 99.021667 98.059998
0.010000 0.037536 0.066795 99.135002 97.989998
6 changes: 3 additions & 3 deletions pid.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ def step(self, closure=None):
D_buf.mul_(momentum).add_(d_p-g_buf)
else:
D_buf = param_state['D_buffer']
g_buf = param_state['grad_buffer']

g_buf = param_state['grad_buffer']
D_buf.mul_(momentum).add_(1-momentum, d_p-g_buf)
g_buf = d_p.clone()
self.state[p]['grad_buffer']= d_p.clone()


d_p = d_p.add_(I, I_buf).add_(D, D_buf)
p.data.add_(-group['lr'], d_p)
Expand Down
Loading

0 comments on commit 0ec63b4

Please sign in to comment.