<a href="https://colab.research.google.com/github/skhosanam/ml-dl-classification/blob/main/Implementation_of_Backpropagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, MaxPooling1D, LSTM, Dense, Flatten, concatenate, Layer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
from google.colab import files
uploaded = files.upload()

Saving fakeNews.csv to fakeNews.csv
Saving trueNews.csv to trueNews.csv


In [None]:
# Load and prepare the data
def load_and_preprocess_data(fake_path, true_path):
    fake_news_df = pd.read_csv('fakeNews.csv')
    true_news_df = pd.read_csv('trueNews.csv')

  # Add labels
    fake_news_df['label'] = 1
    true_news_df['label'] = 0

    # Combine the datasets
    combined_df = pd.concat([fake_news_df, true_news_df], ignore_index=True)

    # Shuffle the combined dataframe
    combined_df = combined_df.sample(frac=1).reset_index(drop=True)

    # Tokenize and pad text
    tokenizer = Tokenizer(num_words=10000)  # Considering only the top 10,000 words
    tokenizer.fit_on_texts(combined_df['Text'])
    sequences = tokenizer.texts_to_sequences(combined_df['Text'])
    data = pad_sequences(sequences, maxlen=200)

    # Labels
    labels = combined_df['label'].values

    return data, labels, tokenizer.word_index

# Define Feedback Layer to simulate feedback from System 2 to System 1
# Dual Process Theory: Dual process theories propose that human cognition is governed by two distinct processing systems: a fast, intuitive system (System 1) and a slower, more deliberative system (System 2).
# In the context of backpropagation, one could draw parallels between the network's feedforward pass (System 1), which quickly generates predictions based on current weights and inputs, and the backward pass (System 2), which involves slower, more deliberate adjustments of weights based on error signals.
# Integrating these dual processing systems, the network achieves a balance between efficiency and accuracy in learning and decision-making.

#The FeedbackLayer class defines a custom layer that takes as input the outputs from two different parts of the neural network (System 1 and System 2), applies a feedback mechanism by adjusting the output of System 1 based on the output of System 2, and returns the adjusted output.
#This layer is designed to enhance the interaction between System 1 (CNN) and System 2 (LSTM) in the architecture for improved performance in text classification tasks.

class FeedbackLayer(Layer):
    def __init__(self, system1_dim, **kwargs):
        super(FeedbackLayer, self).__init__(**kwargs)
        self.system1_dim = system1_dim  # Dimension of System 1 output

    def build(self, input_shape):
        # Adjust the shape of feedback_weights to map from system2_dim to system1_dim
        system2_dim = input_shape[1][-1]
        self.feedback_weights = self.add_weight(
            shape=(system2_dim, self.system1_dim),
            initializer="random_normal",
            trainable=True
        )

    def call(self, inputs):
        system1_output, system2_output = inputs
        # Map system2_output to the space of system1_output
        feedback = tf.matmul(system2_output, self.feedback_weights)
        # Add feedback to system1_output
        return system1_output + feedback

def create_model(vocab_size, embedding_dim):
    inputs = Input(shape=(200,))
    x = Embedding(vocab_size, embedding_dim, input_length=200)(inputs)

    # System 1: CNN
     #System 1 applies convolutional operations to capture local patterns in the text.
    cnn = Conv1D(64, 5, activation='relu')(x)
    cnn = MaxPooling1D(5)(cnn)
    cnn = Flatten()(cnn)  # This will output a flat vector

    # System 2: LSTM
    #System 2 (LSTM) processes the text sequence to capture long-range dependencies.
    lstm = LSTM(64, return_sequences=False)(x)  # This outputs a vector of size 64

    # Feedback from System 2 to System 1
    #Feedback layer adjusts the output of System 1 based on the output of System 2.
    feedback = FeedbackLayer(int(cnn.shape[-1]))([cnn, lstm])  # Ensure dimension compatibility

    # Combine feedback-adjusted output with System 2 output
   #Concatenation layer combines the adjusted output of System 1 with the output of System 2.
    combined = concatenate([feedback, lstm])

# Fully connected layers for classification
#Fully connected layers perform classification tasks based on the combined features.
    x = Dense(64, activation='relu')(combined)
    outputs = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=outputs)
#Optimizer: The adam optimizer is specified here. Adam optimizer inherently implements a sophisticated algorithm for adjusting weights that is based on adaptive estimation of first-order and second-order moments.
#Loss Function: The loss function (binary_crossentropy) is what the gradients are calculated against.
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) #Location of Loss function in the code

    return model

# Instantiate and display the model summary
model = create_model(vocab_size, 100)
model.summary()

# Training
checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss', mode='min')

#The fit method is where backpropagation is actually executed. Each epoch involves a forward pass (calculating predictions and loss) and a backward pass (computing gradients and updating weights).
#Epochs: Determines how many times the network will go through the entire training dataset.
#Batch Size: This defines the number of samples that will be propagated through the network before the optimizer updates the weights. Smaller batch sizes mean the gradient and the subsequent updates are less precise but faster to compute, whereas larger batches provide a more accurate gradient estimate at the cost of more computation.
history = model.fit(data, labels, epochs=10, batch_size=32, validation_split=0.2, callbacks=[checkpoint])

# Evaluation
model.load_weights('best_model.h5')
loss, accuracy = model.evaluate(data, labels)
print(f"Final Loss: {loss}, Final Accuracy: {accuracy}")


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 200)]                0         []                            
                                                                                                  
 embedding_1 (Embedding)     (None, 200, 100)             1825200   ['input_2[0][0]']             
                                                                                                  
 conv1d_1 (Conv1D)           (None, 196, 64)              32064     ['embedding_1[0][0]']         
                                                                                                  
 max_pooling1d_1 (MaxPoolin  (None, 39, 64)               0         ['conv1d_1[0][0]']            
 g1D)                                                                                         

  saving_api.save_model(


Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Final Loss: 0.0017123606521636248, Final Accuracy: 0.9996046423912048
