-
Notifications
You must be signed in to change notification settings - Fork 0
/
paper_2stn.py
132 lines (112 loc) · 4.13 KB
/
paper_2stn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import math
nclasses = 37 # GTSRB as 43 classes
class Net(nn.Module):
def __init__(self, no_dp=False,p=0.5):
super(Net, self).__init__()
self.no_dp = no_dp
self.p = p
self.conv1 = nn.Conv2d(3, 100, kernel_size=7)
self.bn1 = nn.BatchNorm2d(100)
self.bn0 = nn.BatchNorm2d(3)
self.conv2 = nn.Conv2d(100, 150, kernel_size=4)
self.bn3 = nn.BatchNorm2d(150)
self.conv3 = nn.Conv2d(150, 250, kernel_size=4)
self.bn2 = nn.BatchNorm2d(250)
if not no_dp:
self.conv2_drop = nn.Dropout2d(p=p)
self.conv3_drop = nn.Dropout2d(p=p)
self.fc1 = nn.Linear(2250, 300)
self.fc2 = nn.Linear(300, nclasses)
# Initilize the parameters
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
'''
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
'''
# Spatial transformer localization-network
self.localization = nn.Sequential(
nn.Conv2d(3, 8, kernel_size=7),
nn.MaxPool2d(2, stride=2),
nn.ReLU(True),
nn.Conv2d(8, 10, kernel_size=5),
nn.MaxPool2d(2, stride=2),
nn.ReLU(True)
)
# Regressor for the 3 * 2 affine matrix
self.fc_loc = nn.Sequential(
nn.Linear(10 * 3 * 3, 32),
nn.ReLU(True),
nn.Linear(32, 3 * 2)
)
# Initialize the weights/bias with identity transformation
self.fc_loc[2].weight.data.zero_()
self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
# STN2 channel matching
# Spatial transformer localization-network input 21 * 21
self.localization2 = nn.Sequential(
nn.MaxPool2d(2, stride=2),
nn.ReLU(True),
nn.Conv2d(100, 150, kernel_size=3),
nn.ReLU(True),
nn.MaxPool2d(2, stride=2),
nn.Conv2d(150, 200, kernel_size=3),
nn.MaxPool2d(2, stride=2)
)
# Regressor for the 3 * 2 affine matrix
self.fc_loc2 = nn.Sequential(
nn.Linear(200, 300),
nn.ReLU(True),
nn.Linear(300, 3 * 2)
)
# Initialize the weights/bias with identity transformation
self.fc_loc2[2].weight.data.zero_()
self.fc_loc2[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
# Spatial transformer network forward function
def stn(self, x, x1):
xs = self.localization(x1)
xs = xs.view(-1, 10 * 3 * 3)
theta = self.fc_loc(xs)
theta = theta.view(-1, 2, 3)
grid = F.affine_grid(theta, x.size())
x = F.grid_sample(x, grid)
return x
# Spatial transformer network forward function
def stn2(self, x, x1):
xs = self.localization2(x1)
xs = xs.view(-1, 200)
theta = self.fc_loc2(xs)
theta = theta.view(-1, 2, 3)
grid = F.affine_grid(theta, x.size())
x = F.grid_sample(x, grid)
return x
def forward(self, x):
# STN 1
x1 = F.upsample(x, size=(28, 28), mode='bilinear')
x = self.stn(x, x1)
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = self.bn1(x)
# STN 2
x = self.stn2(x, x)
if self.no_dp:
x = F.relu(F.max_pool2d(self.conv2(x), 2))
x = F.relu(F.max_pool2d(self.conv3(x), 2))
else:
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
#x = self.bn3(x)
x = F.relu(F.max_pool2d(self.conv3_drop(self.conv3(x)), 2))
x = self.bn2(x)
x = x.view(-1, 2250)
x = F.relu(self.fc1(x))
if not self.no_dp:
x = F.dropout(x, p =self.p, training=self.training)
x = self.fc2(x)
#return F.log_softmax(x)
return x