In [2]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model
import io
from IPython.display import Image, display

# XOR Problem Implementation with Multilayer Perceptron (MLP)

# 1. Generate XOR data
# XOR truth table:
# A | B | Output
# 0 | 0 | 0
# 0 | 1 | 1
# 1 | 0 | 1
# 1 | 1 | 0

# Input features
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])

# Output labels
y = np.array([0, 1, 1, 0])

# 2. Create and train the MLP model using TensorFlow
# We need at least one hidden layer because XOR is not linearly separable
model = Sequential([
    Dense(4, input_shape=(2,), activation='relu', name='hidden_layer'),
    Dense(1, activation='sigmoid', name='output_layer')
])

# Compile the model
model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

# Display model summary
model.summary()

# Visualize the model architecture
plot_model(model, to_file='xor_model.png', show_shapes=True, show_layer_names=True)
display(Image('xor_model.png'))

# Train the model
history = model.fit(X, y, epochs=1000, verbose=0)

# 3. Make predictions
predictions = (model.predict(X) > 0.5).astype(int).flatten()
print("Predictions:", predictions)
print("Accuracy:", np.mean(predictions == y))

# 4. Visualize the decision boundary
# Create a mesh grid to visualize the decision boundary
h = 0.02  # Step size in the mesh
x_min, x_max = -0.5, 1.5
y_min, y_max = -0.5, 1.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

# Predict the function value for the whole grid
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = (Z > 0.5).astype(int).reshape(xx.shape)

# Plot the contour and training examples
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)

# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k', s=100)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('XOR Problem - MLP Decision Boundary')

# 5. Visualize the network weights
plt.figure(figsize=(12, 6))

# Get weights from the model
hidden_weights = model.layers[0].get_weights()[0]
hidden_biases = model.layers[0].get_weights()[1]
output_weights = model.layers[1].get_weights()[0]
output_bias = model.layers[1].get_weights()[1]

# Plot the network
ax = plt.subplot(1, 2, 1)
plt.title('Neural Network Weights')

# Draw nodes
# Input layer
plt.scatter([0, 0], [0, 1], s=100, c='blue', label='Input')
plt.text(-0.15, 0, 'X1')
plt.text(-0.15, 1, 'X2')

# Hidden layer
plt.scatter([1, 1, 1, 1], [0, 0.33, 0.67, 1], s=100, c='green', label='Hidden')
for i in range(4):
    plt.text(1.05, i/3, f'H{i+1}')

# Output layer
plt.scatter([2], [0.5], s=100, c='red', label='Output')
plt.text(2.05, 0.5, 'Y')

# Draw connections from input to hidden with weights
for i in range(2):  # input nodes
    for j in range(4):  # hidden nodes
        weight = hidden_weights[i, j]
        color = 'red' if weight < 0 else 'blue'
        width = abs(weight) * 3
        plt.plot([0, 1], [i, j/3], c=color, linewidth=width, alpha=0.6)

# Draw connections from hidden to output with weights
for j in range(4):  # hidden nodes
    weight = output_weights[j, 0]
    color = 'red' if weight < 0 else 'blue'
    width = abs(weight) * 3
    plt.plot([1, 2], [j/3, 0.5], c=color, linewidth=width, alpha=0.6)

plt.xlim(-0.5, 2.5)
plt.ylim(-0.5, 1.5)
plt.legend()
plt.axis('off')

# Plot the decision boundary again
plt.subplot(1, 2, 2)
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k', s=100)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('XOR Problem - Decision Boundary')

plt.tight_layout()
plt.show()

# 6. Explanation of why MLP can solve XOR
"""
Explanation of XOR and MLP:

1. XOR Problem:
   - XOR (exclusive OR) is not linearly separable, meaning we cannot draw a single
     straight line to separate the classes (0s and 1s).
   - Points (0,0) and (1,1) belong to one class, while (0,1) and (1,0) belong to another.

2. Why MLP Works:
   - A single-layer perceptron (no hidden layers) CANNOT solve XOR.
   - MLP with at least one hidden layer CAN solve XOR by:
     a) First hidden layer transforms the input space into a new representation
     b) In this new space, the data becomes linearly separable
     c) Output layer can then draw a linear boundary in this transformed space

3. Network Architecture:
   - Input layer: 2 neurons (for the 2 input features)
   - Hidden layer: 4 neurons with ReLU activation
   - Output layer: 1 neuron with sigmoid activation (binary classification)

4. Learning Process:
   - The network learns the weights that transform the input space
   - ReLU activation introduces non-linearity, which is crucial for solving XOR
   - Backpropagation adjusts weights to minimize prediction error
"""



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/marc.reyes/Documents/lecture/.venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/marc.reyes/Documents/lecture/.venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/marc.reyes/Documents/lecture/.venv/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 7

AttributeError: _ARRAY_API not found

ImportError: numpy.core._multiarray_umath failed to import

ImportError: numpy.core.umath failed to import

TypeError: Unable to convert function return value to a Python type! The signature was
	() -> handle

In [2]:
!uv add tensorflow

[2mResolved [1m85 packages[0m [2min 5ms[0m[0m
[2mAudited [1m80 packages[0m [2min 0.22ms[0m[0m
