María Romero Huertas (Erasmus)

In [1]:
import pandas as pd

# Read data from the file
data = pd.read_csv('data1.txt', header=None)

# Display the data
print(data)

           0        1  2
0   0.218350  0.81884  1
1   0.141150  0.83535  1
2   0.370220  0.81110  1
3   0.315650  0.83101  1
4   0.364840  0.85180  1
5   0.461110  0.82518  1
6   0.552230  0.83449  1
7   0.169750  0.84049  1
8   0.491870  0.80889  1
9   0.149130  0.77104 -1
10  0.184740  0.62790 -1
11  0.088380  0.62068 -1
12  0.098166  0.79092 -1


In [2]:
# Extract the first two columns into matrix X
X = data.iloc[:, :-1].values  

# Extract the third column into array Y
T = data.iloc[:, -1].values   

In [3]:
X, T

(array([[0.21835 , 0.81884 ],
        [0.14115 , 0.83535 ],
        [0.37022 , 0.8111  ],
        [0.31565 , 0.83101 ],
        [0.36484 , 0.8518  ],
        [0.46111 , 0.82518 ],
        [0.55223 , 0.83449 ],
        [0.16975 , 0.84049 ],
        [0.49187 , 0.80889 ],
        [0.14913 , 0.77104 ],
        [0.18474 , 0.6279  ],
        [0.08838 , 0.62068 ],
        [0.098166, 0.79092 ]]),
 array([ 1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1, -1, -1]))

Train single perceptron with two inputs and one output

In [4]:
# Generate random initial values of w1, w2 and b

from numpy.random import randn, seed

# Set a seed for reproducibility
SEED = 22
seed(SEED)

# Generate random initial values
w1 = randn()
w2 = randn()
b = randn()

w1, w2, b

(-0.0919499198651913, -1.4633506528116793, 1.0817916791983249)

Calculate weighted sum with randomly generated parameters

In [5]:
def calculateErrors(X, T, w1, w2, b):
    errors = []
    
    # Loop over each sample in X
    for i, x in enumerate(X):
        
        # Calculate the output for the data sample
        v = x[0] * w1 + x[1] * w2 + b
        
        # Predict label based on v
        if v > 0:
            y = 1
        else:
            y = -1 
        
        # Calculate the error for the data sample
        e = T[i] - y
        errors.append(e)
    
    return errors
      

In [6]:
def train(X, T, w1, w2, b, ETA):
    errors = []
    
    # Loop over each sample in X
    for i, x in enumerate(X):
        
        # Calculate the output for the data sample
        v = x[0] * w1 + x[1] * w2 + b
        
        # Predict label based on v
        if v > 0:
            y = 1
        else:
            y = -1 
        
        # Calculate the error for the data sample
        e = T[i] - y
        errors.append(e)
        
        # Update parameters
        w1 = w1 + ETA * e * x[0]
        w2 = w2 + ETA * e * x[1]
        b = b + ETA * e
    
    return errors, w1, w2, b

Split the data in train/test (5 examples for the training split)

In [7]:
NUM_TRAIN_EXAMPLES = 5

# Manually select the training data because it's a small sample and we need to make sure both classes are included
train_index = [0, 1, 2, 11, 12]
test_index = [i for i in range(len(X)) if i not in train_index]

# Select the first five rows for x_train
X_train = X[train_index]

# Select the remaining rows for x_test
X_test = X[test_index]

# Select the first five labels for t_train
T_train = T[train_index]

# Select the remaining labels for t_test
T_test = T[test_index]

Calculate the total error for these 5 inputs 

In [8]:
errors = calculateErrors(X_train, T_train, w1, w2, b)

In [9]:
errors

[2, 2, 2, -2, 0]

In [10]:
totalAE = sum(abs(e) for e in errors)
totalAE

8

### Training

Write the training algorithm

In [11]:
ETA = 1 #random number between 0 and 1

while totalAE != 0:
    # Calculate output and errors for current sample and update parameters
    errors, w1, w2, b = train(X_train, T_train, w1, w2, b, ETA)
    
    # Calculate total error for current example
    totalAE = sum(abs(e) for e in errors)

totalAE

0

In [12]:
print('After applying training algorithm')
print('Total absolute error:', totalAE)
print(f"Updated parameters: w1: {round(w1,2)}, w2: {round(w2,2)}, b: {round(b,2)}")

After applying training algorithm
Total absolute error: 0
Updated parameters: w1: 4.78, w2: 3.05, b: -2.92


### Testing

In [13]:
import pandas as pd

# Calculate the error for each testing example using the updated parameters
test_errors = calculateErrors(X_test, T_test, w1, w2, b)

prediction_results = [e == 0 for e in test_errors]

data = {
    'Prediction Result': prediction_results,
    'Prediction Error': test_errors
}
df = pd.DataFrame(data)
df

Unnamed: 0,Prediction Result,Prediction Error
0,True,0
1,True,0
2,True,0
3,True,0
4,True,0
5,True,0
6,False,-2
7,True,0


In [14]:
test_AE = sum(abs(e) for e in test_errors)
print('Total absolute error for the test:', test_AE)

Total absolute error for the test: 2


In [15]:
# Calculate accuracy (number of correct predictions over total predictions)
accuracy = sum(prediction_results) / len(prediction_results) * 100

# Display the accuracy
print(f'Accuracy: {accuracy:.2f}%')

Accuracy: 87.50%
