<a href="https://colab.research.google.com/github/sdd261/DeepLearning/blob/main/research(noisedata).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam


In [2]:
# Step 1: Load and Preprocess CIFAR-10 Dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 0us/step


In [3]:
# Normalize the data (to range [0, 1])
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

In [4]:
# Step 2: Introduce Noise into Labels
def introduce_noise(labels, noise_level=0.2):
    """Function to introduce noise in a certain percentage of labels."""
    num_samples = labels.shape[0]
    num_noisy = int(noise_level * num_samples)

    # Randomly choose indices to corrupt
    noisy_indices = np.random.choice(num_samples, num_noisy, replace=False)

    # Generate noisy labels
    noisy_labels = np.random.randint(0, 10, size=num_noisy)

    # Replace original labels with noisy ones
    labels[noisy_indices] = noisy_labels.reshape(-1, 1)

    return labels

# Introduce 20% noise into the training labels
y_train_noisy = introduce_noise(np.copy(y_train), noise_level=0.2)

In [5]:

# Step 3: One-Hot Encoding of Labels
# Ensure labels are in the correct shape (convert from 2D array to 1D array)
y_train_noisy_flat = np.squeeze(y_train_noisy)
y_test_flat = np.squeeze(y_test)


In [6]:
# One-Hot Encoding of Labels
y_train_noisy_onehot = to_categorical(y_train_noisy_flat, num_classes=10)
y_test_onehot = to_categorical(y_test_flat, num_classes=10)

In [7]:

# Step 4: Build the CNN Model
def create_cnn_model(input_shape=(32, 32, 3), num_classes=10):
    """Function to create a simple CNN model."""
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    return model


In [8]:
# Step 5: Compile the CNN Model
cnn_model = create_cnn_model()
cnn_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
# Step 6: Train the CNN Model
cnn_model.fit(x_train, y_train_noisy_onehot, epochs=50, batch_size=64, validation_data=(x_test, y_test_onehot))

Epoch 1/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 11ms/step - accuracy: 0.2631 - loss: 2.0356 - val_accuracy: 0.5341 - val_loss: 1.3979
Epoch 2/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.4518 - loss: 1.6834 - val_accuracy: 0.6035 - val_loss: 1.2179
Epoch 3/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.5130 - loss: 1.5602 - val_accuracy: 0.6080 - val_loss: 1.2020
Epoch 4/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.5443 - loss: 1.4887 - val_accuracy: 0.6431 - val_loss: 1.1226
Epoch 5/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.5692 - loss: 1.4303 - val_accuracy: 0.6455 - val_loss: 1.0931
Epoch 6/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.5904 - loss: 1.3776 - val_accuracy: 0.6634 - val_loss: 1.0536
Epoch 7/50
[1m782/782[0m

<keras.src.callbacks.history.History at 0x7ef87cee0b20>

In [10]:
# Step 7: Evaluate the Model
test_loss, test_acc = cnn_model.evaluate(x_test, y_test_onehot, verbose=2)
print(f"Test accuracy on noisy labels: {test_acc}")


313/313 - 1s - 4ms/step - accuracy: 0.5329 - loss: 4.0330
Test accuracy on noisy labels: 0.5328999757766724


 In this experiment, 20% of the training labels were deliberately corrupted with random incorrect values. Training a model on noisy data typically leads to a drop in accuracy because the model is learning from incorrect information. Achieving 53.29% accuracy in such a scenario means the model still generalizes reasonably well despite the label noise.