# CNN
## Increase the complexity of the CNN and Ensemble methods
#### Ensemble methods: Instead of training a single model, you can try using ensemble methods such as bagging or boosting. These techniques involve training multiple models and combining their predictions, which often leads to better performance.

In [None]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf

# Load the training data and labels
df_features = pd.read_csv('traindata.txt', delimiter=',', header=None)
df_labels = pd.read_csv('trainlabels.txt', header=None)

# Split df_features into X_train and X_test
X_train, X_test, y_train, y_test = train_test_split(
    df_features,
    df_labels,
    test_size=0.3,
    random_state=42
)

print(df_features.shape)
print(df_labels.shape)

# Assuming X_train is a Pandas Series
# Data augmentation - random perturbations
augmented_X_train = []
augmented_y_train = []

for i in range(len(X_train)):
    original_data = X_train.iloc[i].to_numpy()
    augmented_X_train.append(original_data)
    augmented_y_train.append(y_train.iloc[i].values[0])

    # Apply random perturbations
    perturbed_data = original_data + np.random.normal(0, 0.1, size=original_data.shape)
    augmented_X_train.append(perturbed_data)
    augmented_y_train.append(y_train.iloc[i].values[0])

# Convert augmented data to DataFrames
augmented_X_train = pd.DataFrame(augmented_X_train)
augmented_y_train = pd.DataFrame(augmented_y_train)

# Concatenate augmented data with original data
X_train = pd.concat([X_train, augmented_X_train], axis=0)
y_train = pd.concat([y_train, augmented_y_train], axis=0)

# Shuffle the augmented data
X_train, y_train = shuffle(X_train, y_train, random_state=42)

# Reshape the data for CNN
X_train = X_train.values.reshape(-1, 71, 1)
X_test = X_test.values.reshape(-1, 71, 1)

# Define some constants
INPUT_SHAPE = (71, 1)  # Number of input features and channels
NUM_CLASSES = 10  # Number of output classes (0-9)
LEARNING_RATE = 0.001  # Adjust as necessary
NUM_MODELS = 20  # Number of models in the ensemble

# One-hot encoding of output
y_train_encoded = tf.keras.utils.to_categorical(y_train, NUM_CLASSES)
y_test_encoded = tf.keras.utils.to_categorical(y_test, NUM_CLASSES)

# Define the ensemble of models
ensemble_models = []
for _ in range(NUM_MODELS):
    # Define the CNN architecture
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=INPUT_SHAPE),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu'),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu'),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
    ])

    # Define an Adam optimizer with the desired learning rate
    optimizer = tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE)

    # Compile the model with the custom optimizer
    model.compile(optimizer=optimizer,
                  loss='mean_squared_error',
                  metrics=['accuracy'])

    # Append the model to the ensemble
    ensemble_models.append(model)

# Train each model in the ensemble
for model_index, model in enumerate(ensemble_models):
    print(f"Model {model_index + 1} Training:")
    history = model.fit(X_train, y_train_encoded,
                        epochs=30,
                        batch_size=32,
                        verbose=1)  # Set verbose=1 to see training progress

    # Predicting the test set results
    y_test_pred_prob = model.predict(X_test)
    y_test_pred = np.argmax(y_test_pred_prob, axis=1)

    # Summary of the model
    model.summary()

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_test_pred)
    print('Model Accuracy:', accuracy)
    print('Model Loss:', history.history['loss'][-1])
    print()

# Predicting the test set results using the ensemble
y_test_preds = []
for model in ensemble_models:
    y_test_pred_prob = model.predict(X_test)
    y_test_pred = np.argmax(y_test_pred_prob, axis=1)
    y_test_preds.append(y_test_pred)

# Take the majority vote from the ensemble predictions
y_test_preds_ensemble = np.round(np.mean(y_test_preds, axis=0)).astype(int)

# Calculate accuracy of the ensemble model
ensemble_accuracy = accuracy_score(y_test, y_test_preds_ensemble)
print('Ensemble Model Accuracy:', ensemble_accuracy)
