In [1]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def tanh(x):
    return np.tanh(x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

class Convolution:
    def __init__(self, input_shape, filter_size, num_filters):
        self.input_shape = input_shape
        self.num_filters = num_filters
        self.filter_size = filter_size
        
        self.output_shape = (
            num_filters,
            input_shape[0] - filter_size + 1,
            input_shape[1] - filter_size + 1
        )
        
        self.filters = np.random.randn(num_filters, filter_size, filter_size) * 0.01
        self.biases = np.zeros(self.output_shape)
        self.input_data = None
    
    def forward(self, input_data):
        self.input_data = input_data
        batch_size = input_data.shape[0]
        output = np.zeros((batch_size, *self.output_shape))
        
        for i in range(batch_size):
            for f in range(self.num_filters):
                for h in range(self.output_shape[1]):
                    for w in range(self.output_shape[2]):
                        output[i, f, h, w] = np.sum(
                            input_data[i, h:h+self.filter_size, w:w+self.filter_size] * self.filters[f]
                        ) + self.biases[f, h, w]
        
        return output
    
    def backward(self, d_output, learning_rate):
        batch_size = d_output.shape[0]
        d_input = np.zeros_like(self.input_data)
        d_filters = np.zeros_like(self.filters)
        d_biases = np.zeros_like(self.biases)
        
        for i in range(batch_size):
            for f in range(self.num_filters):
                for h in range(self.output_shape[1]):
                    for w in range(self.output_shape[2]):
                        d_input[i, h:h+self.filter_size, w:w+self.filter_size] += d_output[i, f, h, w] * self.filters[f]
                        d_filters[f] += d_output[i, f, h, w] * self.input_data[i, h:h+self.filter_size, w:w+self.filter_size]
                        d_biases[f, h, w] += d_output[i, f, h, w]
        
        self.filters -= learning_rate * d_filters
        self.biases -= learning_rate * d_biases
        
        return d_input

class Pooling:
    def __init__(self, pool_size):
        self.pool_size = pool_size
        self.input_data = None
        self.max_indices = None
        self.output = None
    
    def forward(self, input_data):
        self.input_data = input_data
        batch_size, num_channels, input_height, input_width = input_data.shape
        
        output_height = input_height // self.pool_size
        output_width = input_width // self.pool_size
        
        output = np.zeros((batch_size, num_channels, output_height, output_width))
        self.max_indices = np.zeros_like(input_data, dtype=bool)
        
        for i in range(batch_size):
            for c in range(num_channels):
                for h in range(output_height):
                    for w in range(output_width):
                        h_start = h * self.pool_size
                        w_start = w * self.pool_size
                        h_end = h_start + self.pool_size
                        w_end = w_start + self.pool_size
                        
                        pool_region = input_data[i, c, h_start:h_end, w_start:w_end]
                        max_val = np.max(pool_region)
                        max_idx = np.argmax(pool_region.flatten())
                        max_idx_h, max_idx_w = np.unravel_index(max_idx, (self.pool_size, self.pool_size))
                        
                        output[i, c, h, w] = max_val
                        self.max_indices[i, c, h_start + max_idx_h, w_start + max_idx_w] = True
        
        self.output = output
        return output
    
    def backward(self, d_output):
        batch_size, num_channels, output_height, output_width = d_output.shape
        d_input = np.zeros_like(self.input_data)
        
        for i in range(batch_size):
            for c in range(num_channels):
                for h in range(output_height):
                    for w in range(output_width):
                        h_start = h * self.pool_size
                        w_start = w * self.pool_size
                        h_end = h_start + self.pool_size
                        w_end = w_start + self.pool_size
                        
                        mask = self.max_indices[i, c, h_start:h_end, w_start:w_end]
                        d_input[i, c, h_start:h_end, w_start:w_end][mask] = d_output[i, c, h, w]
        
        return d_input

class GRU:
    def __init__(self, input_size, hidden_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.W_z = np.random.randn(input_size, hidden_size) * 0.01
        self.U_z = np.random.randn(hidden_size, hidden_size) * 0.01
        self.b_z = np.zeros((1, hidden_size))
        
        self.W_r = np.random.randn(input_size, hidden_size) * 0.01
        self.U_r = np.random.randn(hidden_size, hidden_size) * 0.01
        self.b_r = np.zeros((1, hidden_size))
        
        self.W_h = np.random.randn(input_size, hidden_size) * 0.01
        self.U_h = np.random.randn(hidden_size, hidden_size) * 0.01
        self.b_h = np.zeros((1, hidden_size))
        
        self.x = None
        self.z = None
        self.r = None
        self.h_candidate = None
        self.h_prev = None
        self.h = None
    
    def forward(self, x, h_prev=None):
        if h_prev is None:
            h_prev = np.zeros((x.shape[0], self.hidden_size))
        
        self.x = x
        self.h_prev = h_prev
        
        self.z = sigmoid(np.dot(x, self.W_z) + np.dot(h_prev, self.U_z) + self.b_z)
        self.r = sigmoid(np.dot(x, self.W_r) + np.dot(h_prev, self.U_r) + self.b_r)
        self.h_candidate = tanh(np.dot(x, self.W_h) + np.dot(self.r * h_prev, self.U_h) + self.b_h)
        self.h = (1 - self.z) * h_prev + self.z * self.h_candidate
        
        return self.h
    
    def backward(self, d_h, learning_rate, d_h_next=None):
        if d_h_next is None:
            d_h_next = np.zeros_like(d_h)
        
        d_h = d_h + d_h_next
        
        d_z = d_h * (self.h_candidate - self.h_prev)
        d_h_candidate = d_h * self.z
        d_h_prev = d_h * (1 - self.z)
        
        d_h_candidate_tanh = d_h_candidate * (1 - self.h_candidate**2)
        d_r = np.dot(d_h_candidate_tanh, self.U_h.T) * self.h_prev
        d_h_prev += np.dot(d_h_candidate_tanh, self.U_h.T) * self.r
        d_x_h = np.dot(d_h_candidate_tanh, self.W_h.T)
        
        d_r_sigmoid = d_r * self.r * (1 - self.r)
        d_x_r = np.dot(d_r_sigmoid, self.W_r.T)
        d_h_prev += np.dot(d_r_sigmoid, self.U_r.T)
        
        d_z_sigmoid = d_z * self.z * (1 - self.z)
        d_x_z = np.dot(d_z_sigmoid, self.W_z.T)
        d_h_prev += np.dot(d_z_sigmoid, self.U_z.T)
        
        d_x = d_x_h + d_x_r + d_x_z
        
        self.W_z -= learning_rate * np.dot(self.x.T, d_z_sigmoid)
        self.U_z -= learning_rate * np.dot(self.h_prev.T, d_z_sigmoid)
        self.b_z -= learning_rate * np.sum(d_z_sigmoid, axis=0, keepdims=True)
        
        self.W_r -= learning_rate * np.dot(self.x.T, d_r_sigmoid)
        self.U_r -= learning_rate * np.dot(self.h_prev.T, d_r_sigmoid)
        self.b_r -= learning_rate * np.sum(d_r_sigmoid, axis=0, keepdims=True)
        
        self.W_h -= learning_rate * np.dot(self.x.T, d_h_candidate_tanh)
        self.U_h -= learning_rate * np.dot((self.r * self.h_prev).T, d_h_candidate_tanh)
        self.b_h -= learning_rate * np.sum(d_h_candidate_tanh, axis=0, keepdims=True)
        
        return d_x, d_h_prev

class FullyConnected:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.zeros((1, output_size))
        self.input_data = None
        self.output = None
    
    def forward(self, input_data):
        self.input_data = input_data
        self.output = np.dot(input_data, self.weights) + self.biases
        return self.output
    
    def backward(self, d_output, learning_rate):
        d_input = np.dot(d_output, self.weights.T)
        d_weights = np.dot(self.input_data.T, d_output)
        d_biases = np.sum(d_output, axis=0, keepdims=True)
        
        self.weights -= learning_rate * d_weights
        self.biases -= learning_rate * d_biases
        
        return d_input

class HybridCNNRNN:
    def __init__(self, input_shape, filter_size, num_filters, pool_size, hidden_size, output_size):
        self.conv = Convolution(input_shape, filter_size, num_filters)
        
        conv_output_shape = (
            num_filters,
            input_shape[0] - filter_size + 1,
            input_shape[1] - filter_size + 1
        )
        
        self.pool = Pooling(pool_size)
        pool_output_shape = (
            num_filters,
            conv_output_shape[1] // pool_size,
            conv_output_shape[2] // pool_size
        )
        
        self.flatten_size = pool_output_shape[0] * pool_output_shape[1] * pool_output_shape[2]
        self.gru = GRU(self.flatten_size, hidden_size)
        self.fc = FullyConnected(hidden_size, output_size)
    
    def forward(self, input_data):
        if input_data.ndim == 2:
            input_data = np.expand_dims(input_data, axis=0)
        
        conv_output = self.conv.forward(input_data)
        pool_output = self.pool.forward(conv_output)
        
        batch_size = pool_output.shape[0]
        flattened = pool_output.reshape(batch_size, -1)
        
        gru_output = self.gru.forward(flattened)
        fc_output = self.fc.forward(gru_output)
        
        return softmax(fc_output)
    
    def backward(self, y_true, learning_rate):
        d_output = softmax(self.fc.output) - y_true
        d_fc = self.fc.backward(d_output, learning_rate)
        d_gru, _ = self.gru.backward(d_fc, learning_rate)
        
        batch_size = d_gru.shape[0]
        d_pool = d_gru.reshape(self.pool.output.shape)
        
        d_conv = self.pool.backward(d_pool)
        _ = self.conv.backward(d_conv, learning_rate)
    
    def train(self, X, y, epochs, learning_rate, batch_size=32):
        num_samples = X.shape[0]
        
        for epoch in range(epochs):
            indices = np.random.permutation(num_samples)
            X_shuffled = X[indices]
            y_shuffled = y[indices]
            
            for i in range(0, num_samples, batch_size):
                X_batch = X_shuffled[i:i+batch_size]
                y_batch = y_shuffled[i:i+batch_size]
                
                _ = self.forward(X_batch)
                self.backward(y_batch, learning_rate)
            
            predictions = self.predict(X)
            accuracy = np.mean(np.argmax(predictions, axis=1) == np.argmax(y, axis=1))
            
            print(f"Epoch {epoch+1}/{epochs}, Accuracy: {accuracy:.4f}")
    
    def predict(self, X):
        return self.forward(X)

def preprocess_iris_data(data, target_shape=(8, 8)):
    if data.ndim > 1 and data.shape[0] > 1:
        normalized_data = (data - np.mean(data, axis=0)) / np.std(data, axis=0)
    else:
        means = np.array([5.84, 3.05, 3.76, 1.20])
        stds = np.array([0.83, 0.43, 1.76, 0.76])
        normalized_data = (data - means) / stds
    
    matrix_data = normalized_data.reshape(-1, 2, 2)
    
    upsampled_data = np.zeros((normalized_data.shape[0], target_shape[0], target_shape[1]))
    
    for i in range(normalized_data.shape[0]):
        for h in range(target_shape[0]):
            for w in range(target_shape[1]):
                h_orig = min(int(h * 2 / target_shape[0]), 1)
                w_orig = min(int(w * 2 / target_shape[1]), 1)
                upsampled_data[i, h, w] = matrix_data[i, h_orig, w_orig]
    
    return upsampled_data

def one_hot_encode(labels, num_classes):
    encoded = np.zeros((len(labels), num_classes))
    for i, label in enumerate(labels):
        encoded[i, label] = 1
    return encoded

In [2]:
def train_model():
    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split
    
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    X_train_processed = preprocess_iris_data(X_train)
    X_test_processed = preprocess_iris_data(X_test)
    
    num_classes = len(np.unique(y))
    y_train_encoded = one_hot_encode(y_train, num_classes)
    y_test_encoded = one_hot_encode(y_test, num_classes)
    
    input_shape = (8, 8)
    filter_size = 3
    num_filters = 8
    pool_size = 2
    hidden_size = 32
    output_size = num_classes
    
    model = HybridCNNRNN(input_shape, filter_size, num_filters, pool_size, hidden_size, output_size)
    model.train(X_train_processed, y_train_encoded, epochs=20, learning_rate=0.01, batch_size=16)
    
    test_predictions = model.predict(X_test_processed)
    test_accuracy = np.mean(np.argmax(test_predictions, axis=1) == np.argmax(y_test_encoded, axis=1))
    print(f"Test Accuracy: {test_accuracy:.4f}")
    
    return model, iris.target_names

def predict_with_user_input(model, iris_classes):
    def predict_iris(sepal_length, sepal_width, petal_length, petal_width):
        input_data = np.array([[sepal_length, sepal_width, petal_length, petal_width]])
        processed_data = preprocess_iris_data(input_data)
        prediction = model.predict(processed_data)
        class_idx = np.argmax(prediction[0])
        probability = prediction[0, class_idx]
        
        return {
            "class": iris_classes[class_idx],
            "probability": float(probability),
            "raw_probabilities": prediction[0].tolist()
        }
    
    # Example usage:
    model, iris_classes = train_model()
    result = predict_iris(5.1, 3.5, 1.4, 0.2)
    print(f"Predicted class: {result['class']}")
    print(f"Probability: {result['probability']:.4f}")
    
    return predict_iris

model, iris_classes = train_model()
predictor = predict_with_user_input(model, iris_classes)
result = predictor(5.1, 3.5, 1.4, 0.2)

Epoch 1/20, Accuracy: 0.3417
Epoch 2/20, Accuracy: 0.3417
Epoch 3/20, Accuracy: 0.3417
Epoch 4/20, Accuracy: 0.3417
Epoch 5/20, Accuracy: 0.3417
Epoch 6/20, Accuracy: 0.3417
Epoch 7/20, Accuracy: 0.3417
Epoch 8/20, Accuracy: 0.3417
Epoch 9/20, Accuracy: 0.6750
Epoch 10/20, Accuracy: 0.6667
Epoch 11/20, Accuracy: 0.6750
Epoch 12/20, Accuracy: 0.8000
Epoch 13/20, Accuracy: 0.7667
Epoch 14/20, Accuracy: 0.7000
Epoch 15/20, Accuracy: 0.7333
Epoch 16/20, Accuracy: 0.7750
Epoch 17/20, Accuracy: 0.8250
Epoch 18/20, Accuracy: 0.8667
Epoch 19/20, Accuracy: 0.8667
Epoch 20/20, Accuracy: 0.8750
Test Accuracy: 0.9667
Epoch 1/20, Accuracy: 0.3333
Epoch 2/20, Accuracy: 0.3333
Epoch 3/20, Accuracy: 0.3417
Epoch 4/20, Accuracy: 0.3417
Epoch 5/20, Accuracy: 0.3417
Epoch 6/20, Accuracy: 0.3417
Epoch 7/20, Accuracy: 0.3417
Epoch 8/20, Accuracy: 0.3417
Epoch 9/20, Accuracy: 0.3750
Epoch 10/20, Accuracy: 0.6750
Epoch 11/20, Accuracy: 0.6833
Epoch 12/20, Accuracy: 0.8417
Epoch 13/20, Accuracy: 0.7000
Epoch 