## 1 st approach 


In [1]:
import warnings
warnings.filterwarnings("ignore")

# Using resnet 50


In [2]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications.resnet_v2 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import torch
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D

# Define constants
num_samples = 2500  # Number of images
image_shape = (32, 32, 3)  # Original image shape
num_classes = 10  # Number of classes
epochs = 10  # Number of epochs
data = torch.load('../dataset/dataset/part_one_dataset/train_data/1_train_data.tar.pth')

# Generate synthetic image data for demonstration (random RGB images)
images = data['data']

# Generate synthetic target labels (random integers in range of num_classes)
targets = data['targets']
# Convert integer labels to one-hot encoded vectors
one_hot_labels = tf.keras.utils.to_categorical(targets, num_classes=num_classes)

# Resize images to the required input size for ResNet50V2 (224, 224, 3)
resized_images = tf.image.resize(images, (224, 224))

# Preprocess images using ResNet50V2 preprocessing
processed_images = preprocess_input(resized_images)

# Load ResNet50V2 as the base model with pre-trained ImageNet weights
base_model = ResNet50V2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze the base model to use it as a feature extractor
base_model.trainable = False

# Add custom layers on top of the base model
inputs = Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)  # Pass input through the base model
x = GlobalAveragePooling2D()(x)  # Pool the output feature maps into a single feature vector
outputs = Dense(num_classes, activation="softmax")(x)  # Output layer for classification

# Define the complete model
model = Model(inputs, outputs)

# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# Define callbacks to manage training
callbacks = [
    EarlyStopping(monitor="loss", patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor="loss", factor=0.1, patience=5, min_lr=1e-6)
]

# Train the model for feature extraction
print("\nTraining the model:")
model.fit(
    processed_images,
    one_hot_labels,
    epochs=epochs,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

# Create a feature extractor from the trained ResNet50V2 base model
print("\nExtracting features using the base model:")
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)

# Use the feature extractor to extract features from the images
features = feature_extractor.predict(processed_images, batch_size=32)

# Print the shape of the extracted features
print(f"Extracted features shape: {features.shape}")  # Example: (2500, 7, 7, 2048)






Training the model:
Epoch 1/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 2s/step - accuracy: 0.5163 - loss: 1.4279 - learning_rate: 0.0010
Epoch 2/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 2s/step - accuracy: 0.8706 - loss: 0.4068 - learning_rate: 0.0010
Epoch 3/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 2s/step - accuracy: 0.9141 - loss: 0.2855 - learning_rate: 0.0010
Epoch 4/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 2s/step - accuracy: 0.9449 - loss: 0.2128 - learning_rate: 0.0010
Epoch 5/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 2s/step - accuracy: 0.9584 - loss: 0.1550 - learning_rate: 0.0010
Epoch 6/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 2s/step - accuracy: 0.9803 - loss: 0.1331 - learning_rate: 0.0010
Epoch 7/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 2s/step - accuracy: 0.9865 - loss: 0.1015 - 

## Dimensioality Reduction

### Tsne

In [3]:
# Reshape the extracted features to (num_samples, -1) for t-SNE
# Example shape transformation: (2500, 7, 7, 2048) -> (2500, 7*7*2048)
num_samples, height, width, channels = features.shape
flattened_features = features.reshape(num_samples, height * width * channels)

# Apply t-SNE to reduce dimensions to 20
print("\nApplying t-SNE to reduce dimensions to 20:")
tsne_20 = TSNE(n_components=20, random_state=42, perplexity=30, n_iter=1000, method="exact")
reduced_features_tsne = tsne_20.fit_transform(flattened_features)
print(f"t-SNE reduced features shape (20D): {reduced_features_tsne.shape}")

# Optionally save reduced features to a file for further use
np.savetxt("20D_tsne.csv", reduced_features_tsne, delimiter=",", fmt="%f")


Applying t-SNE to reduce dimensions to 20:
t-SNE reduced features shape (20D): (2500, 20)


### Autoencoders

In [None]:
from tensorflow.keras.layers import Flatten

# Flatten the features
num_samples, height, width, channels = features.shape
flattened_features = features.reshape(num_samples, height * width * channels)

input_dim = flattened_features.shape[1]
encoding_dim = 2000  # Target reduced dimensionality

# Define autoencoder for flat input
input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation='relu')(input_layer)
decoded = Dense(input_dim, activation='sigmoid')(encoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
encoder = Model(inputs=input_layer, outputs=encoded)

# Compile and train
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(flattened_features, flattened_features, epochs=5, batch_size=32, shuffle=True)

# Use the encoder to reduce dimensions
reduced_features_autoencoders = encoder.predict(flattened_features)
print("Reduced features shape:", reduced_features_autoencoders.shape)

np.savetxt("2000D_autoencoders.csv", reduced_features_autoencoders, delimiter=",", fmt="%f")

### LDA

In [5]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
num_samples, height, width, channels = features.shape
flattened_features = features.reshape(num_samples, height * width * channels)
labels = data['targets']
lda = LDA(n_components=min(10 - 1, 200))
reduced_features_lda = lda.fit_transform(flattened_features, labels)
np.savetxt("9D_lda.csv", reduced_features_lda, delimiter=",", fmt="%f")

## Training

In [6]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

# Assuming `features` and `labels` are your LDA-transformed features and labels

# Convert labels to one-hot encoding for multiclass classification
num_classes = 10
y_one_hot = to_categorical(labels, num_classes=num_classes)

# Apply LDA for dimensionality reduction (if not already done)

# Initialize KFold cross-validation
kf = KFold(n_splits=2, shuffle=True, random_state=42)  # Using 5-fold cross-validation

# Store performance metrics
val_accuracies = []
val_losses = []

# Cross-validation loop
for train_index, val_index in kf.split(reduced_features_tsne, labels):
    # Split data into training and validation sets
    X_train, X_val = reduced_features_tsne[train_index], reduced_features_tsne[val_index]
    y_train, y_val = y_one_hot[train_index], y_one_hot[val_index]
    
    # Define the functional model
    input_layer = Input(shape=(X_train.shape[1],))  # Input shape based on reduced LDA features

    # Define hidden layers
    x = Dense(128, activation='relu')(input_layer)
    x = Dropout(0.5)(x)  # Dropout for regularization
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.2)(x)

    # Output layer for multiclass classification
    output_layer = Dense(num_classes, activation='softmax')(x)

    # Build the model
    model = Model(inputs=input_layer, outputs=output_layer)

    # Compile the model
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model on the current fold
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=100, batch_size=32, shuffle=True, verbose=0  # Set verbose=0 for less output
    )

    # Evaluate the model on validation data for this fold
    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    
    # Append results
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)

# Calculate the average performance across all folds
avg_val_loss = np.mean(val_losses)
avg_val_accuracy = np.mean(val_accuracies)

print(f"Average Validation Loss: {avg_val_loss}")
print(f"Average Validation Accuracy: {avg_val_accuracy}")


Average Validation Loss: 1.5021636486053467
Average Validation Accuracy: 0.5924000144004822


In [7]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Assuming `features` and `labels` are your LDA-transformed features and labels
scaler = StandardScaler()
reduced_features_lda = scaler.fit_transform(reduced_features_lda)

# Convert labels to one-hot encoding for multiclass classification
num_classes = 10
y_one_hot = to_categorical(labels, num_classes=num_classes)

# Apply LDA for dimensionality reduction (if not already done)

# Initialize KFold cross-validation
kf = KFold(n_splits=2, shuffle=True, random_state=42)  # Using 5-fold cross-validation

# Store performance metrics
val_accuracies = []
val_losses = []

# Cross-validation loop
for train_index, val_index in kf.split(reduced_features_lda, labels):
    # Split data into training and validation sets
    X_train, X_val = reduced_features_lda[train_index], reduced_features_lda[val_index]
    y_train, y_val = y_one_hot[train_index], y_one_hot[val_index]
    
    # Define the functional model
    input_layer = Input(shape=(X_train.shape[1],))  # Input shape based on reduced LDA features

    # Define hidden layers
    x = Dense(128, activation='relu')(input_layer)
    x = Dropout(0.1)(x)  # Dropout for regularization
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.1)(x)
    

    # Output layer for multiclass classification
    output_layer = Dense(num_classes, activation='softmax')(x)

    # Build the model
    model = Model(inputs=input_layer, outputs=output_layer)

    # Compile the model
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model on the current fold
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=100, batch_size=32, shuffle=True, verbose=0  # Set verbose=0 for less output
    )

    # Evaluate the model on validation data for this fold
    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    
    # Append results
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)
    print(f"Val Accuracy : {val_accuracy}")

# Calculate the average performance across all folds
avg_val_loss = np.mean(val_losses)
avg_val_accuracy = np.mean(val_accuracies)

print(f"Average Validation Loss: {avg_val_loss}")
print(f"Average Validation Accuracy: {avg_val_accuracy}")


Val Accuracy : 0.8295999765396118
Val Accuracy : 0.8511999845504761
Average Validation Loss: 0.6281145215034485
Average Validation Accuracy: 0.840399980545044


In [8]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Assuming features and labels are your LDA-transformed features and labels

# Convert labels to one-hot encoding for multiclass classification
num_classes = 10
y_one_hot = to_categorical(labels, num_classes=num_classes)

# Apply LDA for dimensionality reduction (if not already done)

# Initialize KFold cross-validation
kf = KFold(n_splits=2, shuffle=True, random_state=42)  # Using 5-fold cross-validation

# Store performance metrics
val_accuracies = []
val_losses = []

# Cross-validation loop
for train_index, val_index in kf.split(reduced_features_lda, labels):
    # Split data into training and validation sets
    X_train, X_val = reduced_features_lda[train_index], reduced_features_lda[val_index]
    y_train, y_val = y_one_hot[train_index], y_one_hot[val_index]
    
    # Define the functional model
    input_layer = Input(shape=(X_train.shape[1],))  # Input shape based on reduced LDA features

    # Define hidden layers
    x = Dense(512, activation='relu')(input_layer)
    x = Dropout(0.2)(x)  # Dropout for regularization
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.2)(x)
    

    # Output layer for multiclass classification
    output_layer = Dense(num_classes, activation='softmax')(x)

    # Build the model
    model = Model(inputs=input_layer, outputs=output_layer)

    # Compile the model
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model on the current fold
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=500, batch_size=32, shuffle=True, verbose=0  # Set verbose=0 for less output
    )

    # Evaluate the model on validation data for this fold
    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    
    # Append results
    val_losses.append(val_loss)
    val_accuracies.append(val_accuracy)
    print(f"Val Accuracy : {val_accuracy}")

# Calculate the average performance across all folds
avg_val_loss = np.mean(val_losses)
avg_val_accuracy = np.mean(val_accuracies)

print(f"Average Validation Loss: {avg_val_loss}")
print(f"Average Validation Accuracy: {avg_val_accuracy}")

Val Accuracy : 0.8223999738693237
Val Accuracy : 0.8375999927520752
Average Validation Loss: 2.079393148422241
Average Validation Accuracy: 0.8299999833106995
