In [2]:
import torch 
import torch.nn as nn
import torch.nn.functional as F

* in features: sepal length, sepal width, petal length, petal width
* out features: Iris Setosa, Iris Versicolour, or Iris Virginica

![](https://raw.githubusercontent.com/damiannolan/iris-neural-network/14a9df14a57ab9d350b7bc92b2903fa1f25c4f1c/img/iris_model.png)

In [None]:
class Model(nn.Module):
    # Input Layer (4 features of flowers) --> HL1 (number of neurons) --> HL2(n) --> Ouput(3 Classes of Iris Flower)
    # fc -- fully connected 1 , fully connected 2 
    def __init__(self, in_features=4, h1=8, h2=9, out_features=3):
        super().__init__()
        self.fc1 = nn.Linear(in_features,h1)    # start from in_features and move to h1 , fc(fully connected)
        self.fc2 = nn.Linear(h1,h2)             # start from h1 and move to h2 
        self.out = nn.Linear(h2,out_features)   # start from h2 and move to out_features 
        
                                                # Relu stands for rectified linear unit
    def forward(self,x):
        x = F.relu(self.fc1(x))                 # if output is less than 0 , then use 0 , else leave what it is. 
        x = F.relu(self.fc2(x))                 # if output is less than 0 , then use 0 , else leave what it is. 
        x = self.out(x)
        return x 

`forward` function in your code implements **forward propagation** in the neural network. Here's a breakdown of how it works:
1. Takes Input x:
    * x represents the input data (e.g., 4 features of Iris flowers: sepal length, width, petal length, width).
2. Passes Through Layers:
    * Step 1: 
        * x = F.relu(self.fc1(x))
        * Input x is passed through the first fully connected layer (fc1), then the ReLU activation function is applied.
        * ReLU replaces negative values with 0 and keeps positive values unchanged.
    * Step 2: 
        * x = F.relu(self.fc2(x))
        * The output from fc1 is passed through the second fully connected layer (fc2), followed by another ReLU.
    * Step 3: x = self.out(x)
        * The final layer (out) produces raw scores (logits) for the 3 Iris flower classes without activation (no softmax here!).
3. Returns Output:
    * The raw scores (logits) for each class are returned. These will later be fed into a loss function (e.g., CrossEntropyLoss, which internally applies softmax). 

### Here's a complete example with actual numbers to show how the calculations work in your Iris classifier:
#### Example Input (1 Iris flower with 4 features):

```py
 x = [5.1, 3.5, 1.4, 0.2]  # sepal_len, sepal_wid, petal_len, petal_wid
```

#### Layer 1 (fc1) Parameters:
Let's assume these random weights and biases were initialized:

**Weights (8×4 matrix):**
```py
    W1 = [
    [0.1, -0.2, 0.3, -0.4],  # Neuron 1 weights
    [0.5, -0.1, 0.2, -0.3],  # Neuron 2 weights
    [-0.2, 0.3, -0.1, 0.4],  # Neuron 3 weights
    [0.4, -0.3, 0.2, -0.1],  # Neuron 4 weights
    [0.2, 0.1, -0.3, 0.4],   # Neuron 5 weights
    [-0.1, 0.4, -0.2, 0.3],  # Neuron 6 weights
    [0.3, -0.4, 0.1, -0.2],  # Neuron 7 weights
    [-0.3, 0.2, -0.4, 0.1]   # Neuron 8 weights
]
```

**Bias (8×1 vector):**
```py
b1 = [0.1, -0.1, 0.2, -0.2, 0.3, -0.3, 0.4, -0.4]
```

### Calculation for fc1:
#### 1. Matrix Multiplication (x × W1^T):
```py
# For first neuron:
(5.1×0.1) + (3.5×-0.2) + (1.4×0.3) + (0.2×-0.4) = 0.51 - 0.7 + 0.42 - 0.08 = 0.15

# Similarly for all 8 neurons:
z1 = [0.15, 1.27, -0.38, 1.08, 0.82, -0.27, 0.53, -1.12]
```

#### 2. Add Bias
```py
z1 + b1 = [0.15+0.1, 1.27-0.1, -0.38+0.2, 1.08-0.2, 0.82+0.3, -0.27-0.3, 0.53+0.4, -1.12-0.4]
        = [0.25, 1.17, -0.18, 0.88, 1.12, -0.57, 0.93, -1.52]
```

#### Apply ReLU:
```py
ReLU(z1 + b1) = [max(0,0.25), max(0,1.17), max(0,-0.18), 
                max(0,0.88), max(0,1.12), max(0,-0.57),
                max(0,0.93), max(0,-1.52)]
             = [0.25, 1.17, 0, 0.88, 1.12, 0, 0.93, 0]
```

#### Visualization:
| Operation |	Neuron 1    |   Neuron 2   |   Neuron 3  |    Neuron 4 |  Neuron 5    | Neuron 6   | Neuron 7  |  Neuron 8 |
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| W×x |   0.15    |  1.27   |  -0.38 |   1.08    |  0.82   |  -0.27 |   0.53    |  -1.12 |
| + bias  |   0.25 | 1.17    |    -0.18  |    0.88 |  1.12    | -0.57  | 0.93 |   -1.52 |
| ReLU    |   0.25   |   1.17  |    0    |  0.88   |  1.12  |   0    | 0.93   | 0 |


In [None]:
# Pick a manual seed for randomization 
torch.manual_seed(41)
# Create instance of a model
model = Model()

In [5]:
import pandas as pd 
import matplotlib.pyplot as plt 
%matplotlib inline

In [6]:
url = 'https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/0e7a9b0a5d22642a06d3d5b9bcbad9890c8ee534/iris.csv'
my_df = pd.read_csv(url)

In [7]:
my_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [8]:
my_df['species'] = my_df['species'].replace('setosa',0.0)
my_df['species'] = my_df['species'].replace('virginica',1.0)
my_df['species'] = my_df['species'].replace('versicolor',2.0)
my_df

  my_df['species'] = my_df['species'].replace('versicolor',2.0)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,1.0
146,6.3,2.5,5.0,1.9,1.0
147,6.5,3.0,5.2,2.0,1.0
148,6.2,3.4,5.4,2.3,1.0


In [9]:
# Train Test Split! Set X,y
X = my_df.drop('species',axis=1)
y = my_df['species']

In [10]:
# Convert these to numpy arrays 
X = X.values
y = y.values

In [11]:

from sklearn.model_selection import train_test_split

In [12]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=41)

In [13]:
# Convert X features to float tensors
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)

In [14]:
# Convert y labels to tensor logs
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

In [15]:
# Set the criterion of the model to measure the error
criterion = nn.CrossEntropyLoss()
# Choose Adam optimzer, lr = learning rate (if error doesn't go down after a bunch of iterations (epochs) , lower our learning rate)
optimizer = torch.optim.Adam(model.parameters() ,lr=0.01)

In [16]:
# train our model 
# epochs? (one run thru all the training data in our network)
epoch = 100 
losses = []
for i in range(epoch):
    # Go forward and get a prediction 
    y_pred = model.forward(X_train)     # Get predicted result
    
    # Measure a loss 
    loss = criterion(y_pred, y_train)   # predicted value vs the y_train
    
    # Keep track of losses
    losses.append(loss.detach().numpy())
    
    # Print every 10 epochs 
    if i % 10 == 0:
        print(f'Epoch : {i} and loss : {loss}')
    
    # Do some backpropagation: take the error rate of forward propagation and feed it back thru the network to finetune the weights 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch : 0 and loss : 1.1397020816802979
Epoch : 10 and loss : 1.0544098615646362
Epoch : 20 and loss : 0.9166715741157532
Epoch : 30 and loss : 0.6260551810264587
Epoch : 40 and loss : 0.39875510334968567
Epoch : 50 and loss : 0.24901509284973145
Epoch : 60 and loss : 0.13584764301776886
Epoch : 70 and loss : 0.07556889206171036
Epoch : 80 and loss : 0.05038198083639145
Epoch : 90 and loss : 0.03888977691531181


In [17]:
# Evaulate model in test data
with torch.no_grad():
    y_eval = model.forward(X_test)
    loss = criterion(y_eval, y_test)

In [18]:
loss

tensor(0.1286)

In [27]:
correct = 0 
with torch.no_grad():
    for i, data in enumerate(X_test):
        y_val = model.forward(data)
        
        if y_test[i] == 0:
            x = "setosa"
        elif y_test[i] == 1:
            x = "virginica"
        else:
            x = "versicolor"
        
        
        # What type of flower class our network thinks it is 
        print(f'{i+1}.) {str(y_val)} \t {y_test[i]} \t {y_val.argmax().item()}') 

        # Correct or not
        if y_val.argmax().item() == y_test[i]:
            correct += 1

print(f'We got {correct} correct')

1.) tensor([-6.9816,  5.7408,  2.8455]) 	 1 	 1
2.) tensor([-10.1079,   9.1258,   1.3065]) 	 1 	 1
3.) tensor([-10.9584,   9.6312,   2.1591]) 	 1 	 1
4.) tensor([-2.8685,  1.0380,  5.6331]) 	 2 	 2
5.) tensor([-8.7846,  7.4753,  2.6232]) 	 1 	 1
6.) tensor([-0.5798, -1.5149,  7.0015]) 	 2 	 2
7.) tensor([-6.4517,  4.9981,  3.6210]) 	 1 	 1
8.) tensor([-2.5549,  0.6965,  5.7920]) 	 2 	 2
9.) tensor([-7.5313,  6.1537,  3.1311]) 	 1 	 1
10.) tensor([-10.7096,   9.6681,   1.3449]) 	 1 	 1
11.) tensor([-5.9053,  4.4576,  3.7346]) 	 1 	 1
12.) tensor([ 13.1833, -14.0755,   5.5305]) 	 0 	 0
13.) tensor([ 12.0059, -12.7652,   4.9037]) 	 0 	 0
14.) tensor([ 1.2265, -3.0406,  6.3975]) 	 2 	 2
15.) tensor([ 11.4834, -12.5821,   5.9413]) 	 0 	 0
16.) tensor([-5.4799,  3.9468,  4.1003]) 	 1 	 2
17.) tensor([ 11.9423, -12.8441,   5.3712]) 	 0 	 0
18.) tensor([-6.5523,  5.2595,  3.1084]) 	 2 	 1
19.) tensor([ 12.6839, -13.5421,   5.3429]) 	 0 	 0
20.) tensor([ 10.5711, -11.5527,   5.4033]) 	 0 	 0
21

In [28]:
new_iris = torch.tensor([4.7, 3.2, 1.3, 0.2])

In [31]:
# Evaulate model in new data
with torch.no_grad():
    print(model(new_iris))

tensor([ 12.7953, -13.5874,   5.1347])
