In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import cifar10

# --- Settings ---
BATCH_SIZE = 128
EPOCHS = 30
LATENT_DIM = 2
OUTPUT_DIR = '/content/drive/MyDrive/Colab Notebooks/DL_2025/8/'

os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(f'{OUTPUT_DIR}/reconstructions', exist_ok=True)

# --- Load data ---
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

# --- Encoder ---
encoder_inputs = layers.Input(shape=(32,32,3))
x = layers.Conv2D(32,3,activation='relu', padding='same', strides=2)(encoder_inputs)
x = layers.Conv2D(64,3,activation='relu', padding='same', strides=2)(x)
x = layers.Conv2D(128,3,activation='relu', padding='same', strides=2)(x)
x = layers.Flatten()(x)
latent = layers.Dense(LATENT_DIM, name='latent2d')(x)
encoder = models.Model(encoder_inputs, latent, name='encoder')

# --- Decoder ---
latent_inputs = layers.Input(shape=(LATENT_DIM,))
x = layers.Dense(4*4*128, activation='relu')(latent_inputs)
x = layers.Reshape((4,4,128))(x)
x = layers.Conv2DTranspose(128,3,activation='relu', padding='same', strides=2)(x)
x = layers.Conv2DTranspose(64,3,activation='relu', padding='same', strides=2)(x)
x = layers.Conv2DTranspose(32,3,activation='relu', padding='same', strides=2)(x)
decoder_outputs = layers.Conv2D(3,3,activation='sigmoid', padding='same')(x)
decoder = models.Model(latent_inputs, decoder_outputs, name='decoder')

# --- Autoencoder ---
ae_input = encoder_inputs
ae_output = decoder(encoder(ae_input))
autoencoder = models.Model(ae_input, ae_output, name='autoencoder')
autoencoder.compile(optimizer='adam', loss='mse')

# --- Train ---
history = autoencoder.fit(
    x_train, x_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    shuffle=True,
    validation_data=(x_test, x_test)
)
pd.DataFrame(history.history).to_csv(f'{OUTPUT_DIR}/loss_history.csv', index=False)

# --- Save sample reconstructions ---
n = 10
idxs = np.random.choice(len(x_test), n)
recons = autoencoder.predict(x_test[idxs])
fig, axes = plt.subplots(2, n, figsize=(20,4))
for i in range(n):
    axes[0,i].imshow(x_test[idxs[i]])
    axes[0,i].axis('off')
    axes[1,i].imshow(recons[i])
    axes[1,i].axis('off')
plt.savefig(f'{OUTPUT_DIR}/reconstructions/sample_recons.png')

# --- Encode to 2D features ---
features = encoder.predict(np.vstack([x_train, x_test]))
labels = np.concatenate([y_train, y_test]).reshape(-1)
df = pd.DataFrame(features, columns=['dim1','dim2'])
df['label'] = labels
df.to_csv(f'{OUTPUT_DIR}/latent_2d.csv', index=False)

# --- Plot latent space ---
plt.figure(figsize=(8,6))
scatter = plt.scatter(df['dim1'], df['dim2'], c=df['label'], cmap='tab10', s=5)
plt.colorbar(scatter, ticks=range(10))
plt.title('2D latent space of CIFAR‑10')
plt.savefig(f'{OUTPUT_DIR}/latent2d_plot.png')
# plt.close()


In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.image import resize

In [None]:
# --- Resize images to match ResNet input ---
x_all = np.vstack([x_train, x_test])
y_all = np.vstack([y_train, y_test]).reshape(-1)

In [None]:
def resize_in_batches(images, target_size=(224, 224), batch_size=100):
    resized_images = []
    for i in range(0, len(images), batch_size):
        batch = images[i:i+batch_size]
        resized_batch = tf.image.resize(batch, target_size).numpy()
        resized_images.append(resized_batch)
    return np.vstack(resized_images)

# Resize all images (train + test) in smaller batches
x_all_resized = resize_in_batches(x_all, target_size=(224, 224))
x_all_resized = preprocess_input(x_all_resized)


In [None]:
# --- Load ResNet50 without top classifier ---
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
resnet_model = Model(inputs=base_model.input, outputs=base_model.output)

# --- Extract features (shape: N, 7,7,2048) -> flatten ---
features_resnet = resnet_model.predict(x_all_resized, batch_size=64, verbose=1)
features_resnet = features_resnet.reshape(features_resnet.shape[0], -1)  # (N, 100352)

In [None]:
# --- PCA Reduction to 50 dims, then t-SNE to 2D ---
print("Running PCA...")
pca = PCA(n_components=50)
resnet_pca50 = pca.fit_transform(features_resnet)

print("Running t-SNE...")
tsne = TSNE(n_components=2, random_state=42, perplexity=30, init='pca', learning_rate='auto')
resnet_tsne2d = tsne.fit_transform(resnet_pca50)

# --- Save features ---
df_resnet = pd.DataFrame(resnet_tsne2d, columns=['dim1', 'dim2'])
df_resnet['label'] = y_all
df_resnet.to_csv(f'{OUTPUT_DIR}/resnet50_tsne2d.csv', index=False)

In [None]:
# --- Plot AE vs ResNet+tSNE side by side ---
fig, axs = plt.subplots(1, 2, figsize=(16, 6))

# Autoencoder plot
axs[0].scatter(df['dim1'], df['dim2'], c=df['label'], cmap='tab10', s=5)
axs[0].set_title('Autoencoder 2D Latent Space')

# ResNet t-SNE plot
axs[1].scatter(df_resnet['dim1'], df_resnet['dim2'], c=df_resnet['label'], cmap='tab10', s=5)
axs[1].set_title('ResNet50 Features + PCA + t-SNE')

for ax in axs:
    ax.set_xlabel('dim1')
    ax.set_ylabel('dim2')

plt.tight_layout()
plt.savefig(f'{OUTPUT_DIR}/ae_vs_resnet_tsne.png')
plt.close()
