-
Notifications
You must be signed in to change notification settings - Fork 1
/
model.py
140 lines (112 loc) · 5.7 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from mlx.nn import Dropout
from mlx.nn import Module
from mlx.nn import Conv2d
from mlx.nn import Linear
from mlx.nn import MaxPool2d
from mlx.nn import ReLU
import mlx.core as mx
import mlx.nn as nn
class CNN(Module):
"""
A custom Convolutional Neural Network (CNN) designed for image classification tasks,
built using PyTorch's nn.Module. This network architecture can be applied to a variety of
image datasets such as CIFAR and MNIST (where it is easily able to achieve 99%+ accuracy),
though it underperforms compared to more sophisticated model architectures such as ResNet.
The model consists of several convolutional layers followed by max pooling to extract
features, and fully connected layers for classification. Dropout layers are included to
reduce overfitting.
Parameters:
- channels (int): The number of input channels (e.g., 1 for grayscale images, 3 for RGB images).
- dim (tuple): The dimensions of the input images (height, width). It is assumed
that the height and width are equal.
- classes (int): The number of output classes for the classification task.
The network architecture is as follows:
- First block: Convolutional layer with ReLU activation followed by MaxPooling.
- Second block: Another Convolutional layer with ReLU activation followed by MaxPooling.
- Third block: A third Convolutional layer with ReLU activation followed by MaxPooling.
- Fourth block: A Linear layer with dropout and ReLU activation for the first fully connected layer.
- Fifth block: A second Linear layer with dropout for reducing the feature size further.
- Sixth block: A third Linear layer with dropout leading to the final classification layer.
- Output layer: A LogSoftmax layer for producing the output probabilities for each class.
Dropout is applied after each fully connected layer with a dropout probability of 0.3
to prevent overfitting. The model automatically calculates the input size for the
first fully connected layer based on the input image dimensions.
"""
def __init__(self, channels, dim, classes):
"""
Initializes this network, as described above. Dynamically determines some of the input feature
dimensionality of the later linear layers, based on the (assumed square) height or width
of the image data.
"""
super(CNN, self).__init__()
final_out_channels = 100
fully_connected_input_size = self.calculate_fc_input_size(dim, final_out_channels)
# First block: Conv => ReLU => MaxPool
self.conv1 = Conv2d(in_channels=channels, out_channels=20, kernel_size=(5, 5), padding=2)
self.relu1 = ReLU()
self.maxpool1 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
# Second block: Conv => ReLU => MaxPool
self.conv2 = Conv2d(in_channels=20, out_channels=50, kernel_size=(5, 5), padding=2)
self.relu2 = ReLU()
self.maxpool2 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
# Third block: Conv => ReLU => MaxPool layers
self.conv3 = Conv2d(in_channels=50, out_channels=final_out_channels, kernel_size=(5, 5), padding=2)
self.relu3 = ReLU()
self.maxpool3 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
# Fourth block: Linear => Dropout => ReLU layers
self.linear1 = Linear(input_dims=fully_connected_input_size, output_dims=fully_connected_input_size // 2)
self.dropout1 = Dropout(p=0.3)
self.relu3 = ReLU()
# Fifth block: Linear => Dropout layers
self.linear2 = Linear(input_dims=fully_connected_input_size // 2, output_dims=fully_connected_input_size // 4)
self.dropout2 = Dropout(p=0.3)
# Sixth block: Linear => Dropout layers
self.linear3 = Linear(input_dims=fully_connected_input_size // 4, output_dims=classes)
self.dropout3 = Dropout(p=0.3)
def __call__(self, X):
# First block: Conv => ReLU => MaxPool
X = self.conv1(X)
X = self.relu1(X)
X = self.maxpool1(X)
# Second block: Conv => ReLU => MaxPool
X = self.conv2(X)
X = self.relu2(X)
X = self.maxpool2(X)
# Third block: Conv => ReLU => MaxPool layers
X = self.conv3(X)
X = self.relu3(X)
X = self.maxpool3(X)
# Flatten second layer outputs to fit into the fully-connected layer `linear1`
X = mx.flatten(X, 1)
# Fourth block: Linear => Dropout => ReLU layers
X = self.linear1(X)
X = self.dropout1(X)
X = self.relu3(X)
# Fifth block: Linear => Dropout layers
X = self.linear2(X)
X = self.dropout2(X)
# Sixth block: Linear => Dropout layers
X = self.linear3(X)
X = self.dropout3(X)
# Generate class predictions
return nn.log_softmax(X, axis=1)
def calculate_fc_input_size(self, square_dim, final_out_channels, num_pools=3):
"""
Calculate the input feature dimension for the first fully-connected layer
based on the input image dimensionality.
Args:
- square_dim (int): The width and height of the square input images.
- num_convs (int): Number of convolutional layers (assumed to preserve dimensions).
- num_pools (int): Number of pooling layers (assumed to halve dimensions each time).
Returns:
- int: The number of input features for the first fully-connected layer.
"""
# Assuming each convolutional layer is followed by a pooling layer that halves the input dimensions
# and that convolutions are set up to preserve dimensions.
final_size = square_dim // (2 ** num_pools)
# Assuming the output channel size of the last convolutional layer is fixed.
# For example, if the last conv layer outputs 100 channels:
output_channels = final_out_channels
# Calculate the total number of input features for the first fully-connected layer.
fc_input_features = int(final_size * final_size * output_channels)
return fc_input_features