forked from udacity/P1_Facial_Keypoints
-
Notifications
You must be signed in to change notification settings - Fork 0
/
models.py
156 lines (127 loc) · 5.85 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
'''This module defines a convolutional neural network
that maps 224x224 gray images to 68x2 facial keypoints
with a regression.
The architecture is a first proof-of concept.
I certainly can be improved.
Author: Mikel Sagardia
Date: 2022-06-10
'''
## TODO: define the convolutional neural network architecture
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
# can use the below import should you choose to initialize the weights of your Net
import torch.nn.init as I
# 20,417,004 trainable params
class Net(nn.Module):
def __init__(self, drop_p=0.5):
super(Net, self).__init__()
## TODO: Define all the layers of this CNN, the only requirements are:
## 1. This network takes in a square (same width and height), grayscale image as input
## 2. It ends with a linear layer that represents the keypoints
## it's suggested that you make this last layer output 136 values, 2 for each of the 68 keypoint (x, y) pairs
## Note that among the layers to add, consider including:
# maxpooling layers, multiple conv layers, fully-connected layers, and other layers (such as dropout or batch normalization) to avoid overfitting
## Block 1
# 1 input image channel (grayscale)
# 32 output channels/feature maps
# 5x5 square convolution kernel
# input size: batch_size x 1 x 224 x 224
# output size: batch_size x 32 x 220 x 220 (look formula in docu)
# (W-F)/S + 1 = (224-5)/1 + 1 = 220
self.conv1 = nn.Conv2d(1, 32, 5)
# input size: batch_size x 32 x 220 x 220
# output size: batch_size x 32 x 110 x 110 (look formula in docu)
# kernel_size=2, stride=2 -> W = W/2 = 220/2 = 110
self.pool = nn.MaxPool2d(2,2)
#self.norm1 = nn.BatchNorm2d(32) # num channels; parameters learned!
#self.dropout1 = nn.Dropout(p=np.round(drop_p,2))
## Block 2
# 32 input image channels
# 32 output channels/feature maps
# 5x5 square convolution kernel
# input size: batch_size x 32 x 110 x 110
# output size: batch_size x 32 x 220 x 220 (look formula in docu)
# (W-F)/S + 1 = (110-3)/1 + 1 = 108
self.conv2 = nn.Conv2d(32, 32, 3)
# input size: batch_size x 32 x 108 x 108
# output size batch_size x 32 x 54 x 54 (look formula in docu)
# kernel_size=2, stride=2 -> W = W/2 = 108/2 = 54
#self.norm2 = nn.BatchNorm2d(32) # num channels; parameters learned!
#self.dropout2 = nn.Dropout(p=np.round(drop_p,2))
## Block 3
# 32 input image channels
# 64 output channels/feature maps
# 3x3 square convolution kernel
# input size: batch_size x 32 x 54 x 54
# output size: batch_size x 64 x 52 x 52 (look formula in docu)
# (W-F)/S + 1 = (54-3)/1 + 1 = 52
self.conv3 = nn.Conv2d(32, 64, 3)
# input size: batch_size x 64 x 52 x 52
# output size: batch_size x 64 x 26 x 26 (look formula in docu)
# kernel_size=2, stride=2 -> W = W/2 = 52/2 = 26
self.dropout3 = nn.Dropout(p=np.round(drop_p,2))
## Block 4
# 64 input image channels
# 64 output channels/feature maps
# 3x3 square convolution kernel
# input size: batch_size x 64 x 26 x 26
# output size: batch_size x 128 x 24 x 24 (look formula in docu)
# (W-F)/S + 1 = (26-3)/1 + 1 = 24
self.conv4 = nn.Conv2d(64, 64, 3)
# input size: batch_size x 64 x 24 x 24
# output size: batch_size x 64 x 12 x 12 (look formula in docu)
# kernel_size=2, stride=2 -> W = W/2 = 24/2 = 12
# 64 x 12 x 12 = 9216
self.dropout4 = nn.Dropout(p=np.round(drop_p,2))
# input features: batch_size x 64 x 12 x 12; batch_size x 9216
self.linear1 = nn.Linear(9216,1000)
self.dropout5 = nn.Dropout(p=np.round(drop_p,2))
# 68 x 2 keypoints = 136
self.linear2 = nn.Linear(1000,136)
def forward(self, x):
## TODO: Define the feedforward behavior of this model
## x is the input image and, as an example, here you may choose to include a pool/conv step:
## x = self.pool(F.relu(self.conv1(x)))
# Block 1
x = self.pool(F.relu(self.conv1(x)))
#x = self.norm1(x)
# Block 2
x = self.pool(F.relu(self.conv2(x)))
#x = self.norm2(x)
# Block 3
x = self.pool(F.relu(self.conv3(x)))
x = self.dropout3(x)
# Block 4
x = self.pool(F.relu(self.conv4(x)))
x = self.dropout4(x)
# Flatten: batch_size x 64 x 12 x 12; 64 x 12 x 12 -> (batch_size, 9216)
x = x.view(x.size(0), -1)
x = F.relu(self.linear1(x))
x = self.dropout5(x)
x = self.linear2(x)
# A modified x, having gone through all the layers of your model, should be returned
# We can reshape if desired
# (batch_size, 136) -> (batch_size, 68 (infer), 2)
# x = x.view(x.size(0),-1,2)
#print(x.size())
return x
def initialize_weights(m):
if isinstance(m, nn.Conv2d):
#nn.init.kaiming_uniform_(m.weight.data,nonlinearity='relu')
nn.init.xavier_uniform_(m.weight.data, gain=nn.init.calculate_gain('relu'))
if m.bias is not None:
nn.init.constant_(m.bias.data, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight.data, 1)
nn.init.constant_(m.bias.data, 0)
elif isinstance(m, nn.Linear):
#nn.init.kaiming_uniform_(m.weight.data)
nn.init.xavier_uniform_(m.weight.data, gain=1.0)
nn.init.constant_(m.bias.data, 0)
def get_num_parameters(net):
model_parameters = filter(lambda p: p.requires_grad, net.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
return params