In [7]:
pip install scikit-image


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

# =========================
# Enable Mixed Precision (if GPU is available)
# =========================
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("GPU detected. Enabling mixed precision training.")
    from tensorflow.keras import mixed_precision
    policy = mixed_precision.Policy('mixed_float16')
    mixed_precision.set_global_policy(policy)
else:
    print("No GPU detected. Training will run on CPU.")

# =========================
# 1. Preprocessing Functions
# =========================
def load_and_preprocess_video(video_path, target_size=(256, 256), max_frames=500):
    print(f"Loading video: {video_path}")
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened() and len(frames) < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, target_size)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = frame / 127.5 - 1.0  # Normalize to [-1, 1]
        frames.append(frame)
    cap.release()
    print(f"Loaded {len(frames)} frames from {video_path}")
    return np.array(frames)

def load_videos_from_folder_subset(folder_path, target_size=(256, 256), max_frames=500, num_videos=10):
    all_videos = sorted([f for f in os.listdir(folder_path) if f.endswith('.avi')])
    videos = all_videos[:num_videos]
    frames = []
    for video_file in videos:
        video_path = os.path.join(folder_path, video_file)
        video_frames = load_and_preprocess_video(video_path, target_size, max_frames)
        frames.append(video_frames)
    total_frames = np.concatenate(frames, axis=0) if frames else np.array([])
    print(f"Total frames loaded from folder {folder_path}: {total_frames.shape[0] if len(total_frames) > 0 else 0}")
    return total_frames


No GPU detected. Training will run on CPU.


In [3]:
# =========================
# 2. Residual Block (Simplified)
# =========================
def residual_block(x, filters, kernel_size=3):
    shortcut = x
    x = layers.Conv2D(filters, kernel_size, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)  
    x = layers.LeakyReLU(negative_slope=0.2)(x)
    x = layers.Conv2D(filters, kernel_size, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Add()([shortcut, x])
    x = layers.LeakyReLU(negative_slope=0.2)(x)
    return x

# =========================
# 3. Simplified GAN Architectures
# =========================
class TemporalGAN:
    def __init__(self, frame_shape=(256, 256, 3), latent_dim=128):
        print("Initializing TemporalGAN...")
        self.frame_shape = frame_shape
        self.latent_dim = latent_dim

        self.generator = self.build_generator()
        self.discriminator = self.build_discriminator()

        # Use a slightly lower learning rate for discriminator to slow its convergence
        self.gen_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
        self.disc_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5)
        self.cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        print("TemporalGAN initialized.")

    def build_generator(self):
        print("Building generator with residual blocks...")
        inputs = layers.Input(shape=(self.latent_dim,))
        x = layers.Dense(16 * 16 * 256, use_bias=False)(inputs)
        x = layers.BatchNormalization()(x)
        x = layers.LeakyReLU(negative_slope=0.2)(x)
        x = layers.Reshape((16, 16, 256))(x)
        
        # Upsample to 32x32
        x = layers.Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same', use_bias=False)(x)
        x = layers.BatchNormalization()(x)
        x = layers.LeakyReLU(negative_slope=0.2)(x)
        # Residual block at 32x32
        x = residual_block(x, 128)
        
        # Upsample to 64x64
        x = layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False)(x)
        x = layers.BatchNormalization()(x)
        x = layers.LeakyReLU(negative_slope=0.2)(x)
        # Residual block at 64x64
        x = residual_block(x, 64)
        
        # Upsample to 128x128
        x = layers.Conv2DTranspose(32, (5, 5), strides=(2, 2), padding='same', use_bias=False)(x)
        x = layers.BatchNormalization()(x)
        x = layers.LeakyReLU(negative_slope=0.2)(x)
        
        # Upsample to 256x256
        outputs = layers.Conv2DTranspose(3, (5, 5), strides=(2, 2), padding='same', activation='tanh')(x)
        
        model = models.Model(inputs, outputs)
        print("Generator built.")
        return model

    def build_discriminator(self):
        print("Building simplified discriminator...")
        model = models.Sequential([
            layers.InputLayer(shape=self.frame_shape),
            layers.GaussianNoise(0.1),
            layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same'),
            layers.LeakyReLU(negative_slope=0.2),
            layers.Dropout(0.4),
            layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'),
            layers.LeakyReLU(negative_slope=0.2),
            layers.Dropout(0.4),
            layers.Conv2D(256, (5, 5), strides=(2, 2), padding='same'),
            layers.LeakyReLU(negative_slope=0.2),
            layers.Dropout(0.4),
            layers.Flatten(),
            layers.Dense(1)
        ])
        print("Discriminator built.")
        return model

    def generator_loss(self, fake_output):
        return self.cross_entropy(tf.ones_like(fake_output), fake_output)

    def discriminator_loss(self, real_output, fake_output):
        real_loss = self.cross_entropy(tf.ones_like(real_output) * 0.9, real_output)
        fake_loss = self.cross_entropy(tf.zeros_like(fake_output), fake_output)
        return real_loss + fake_loss

In [5]:
# =========================
# 4. Create an Optimized Data Pipeline
# =========================
def create_dataset(frames, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices(frames)
    dataset = dataset.shuffle(1000).batch(batch_size).repeat().prefetch(tf.data.AUTOTUNE)
    return dataset

In [7]:
# =========================
# 6. Data Loading and Running Training
# =========================
# Replace with your actual root folder path
root_folder = '/Users/omvishal/Desktop/B'  # <<<--- Change this to your folder path
train_videos_folder = os.path.join(root_folder, "train", "BenchPress")
print(f"Loading training videos from: {train_videos_folder}")
train_frames = load_videos_from_folder_subset(train_videos_folder, target_size=(256, 256), max_frames=500, num_videos=10)
print(f"Total training frames loaded: {train_frames.shape[0]}")


Loading training videos from: /Users/omvishal/Desktop/B/train/BenchPress
Loading video: /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c01.avi
Loaded 151 frames from /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c01.avi
Loading video: /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c03.avi
Loaded 104 frames from /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c03.avi
Loading video: /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c04.avi
Loaded 105 frames from /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c04.avi
Loading video: /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c05.avi
Loaded 88 frames from /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c05.avi
Loading video: /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c06.avi
Loaded 111 frames from /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c06.avi
Loading video: /Users/omvishal/Desktop/B/train/BenchPress/

In [17]:
# Set training parameters.
epochs = 50
batch_size = 8
latent_dim = 128
output_dir = "synthetic_frames_test7_50epoch"

In [21]:
val_dir = os.path.join(root_folder, "val", "BenchPress")
print("Contents of", val_dir, "→", os.listdir(val_dir))

Contents of /Users/omvishal/Desktop/B/val/BenchPress → ['v_BenchPress_g12_c06.avi', 'v_BenchPress_g02_c07.avi', 'v_BenchPress_g03_c01.avi', 'v_BenchPress_g02_c01.avi', 'v_BenchPress_g12_c03.avi', 'v_BenchPress_g19_c03.avi', 'v_BenchPress_g09_c03.avi', 'v_BenchPress_g18_c06.avi', 'v_BenchPress_g09_c05.avi', 'v_BenchPress_g19_c06.avi', 'v_BenchPress_g09_c06.avi', 'v_BenchPress_g22_c03.avi', 'v_BenchPress_g05_c05.avi', 'v_BenchPress_g22_c05.avi', 'v_BenchPress_g15_c01.avi', 'v_BenchPress_g04_c05.avi', 'v_BenchPress_g20_c05.avi', 'v_BenchPress_g20_c07.avi', 'v_BenchPress_g17_c05.avi', 'v_BenchPress_g07_c05.avi']


In [23]:
# ─── Replace your val_frames line with this ───
val_folder = os.path.join(root_folder, "val", "BenchPress")
# loads up to 16 frames from the first video it finds
val_frames = load_videos_from_folder_subset(
    val_folder,
    target_size=(256,256),
    max_frames=16,
    num_videos=1
)
if val_frames.size == 0:
    raise RuntimeError(f"No frames loaded from validation folder: {val_folder}")

Loading video: /Users/omvishal/Desktop/B/val/BenchPress/v_BenchPress_g02_c01.avi
Loaded 16 frames from /Users/omvishal/Desktop/B/val/BenchPress/v_BenchPress_g02_c01.avi
Total frames loaded from folder /Users/omvishal/Desktop/B/val/BenchPress: 16


In [9]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
from skimage.metrics import peak_signal_noise_ratio, structural_similarity

# ===== GPU + Mixed Precision Setup  =====
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    from tensorflow.keras import mixed_precision
    mixed_precision.set_global_policy('mixed_float16')
    print("GPU + mixed precision enabled.")
else:
    print("No GPU detected; running on CPU.")


# ===== 0. Validate & Load Validation Frames =====
root_folder = '/Users/omvishal/Desktop/B'
val_folder = os.path.join(root_folder, "val", "BenchPress")

print("Contents of validation folder:", os.listdir(val_folder))

val_frames = load_videos_from_folder_subset(
    val_folder,
    target_size=(256,256),
    max_frames=16,
    num_videos=1
)
if val_frames.size == 0:
    raise RuntimeError(f"No frames loaded from validation folder: {val_folder}")
# ensure exactly 16 for consistency
val_frames = val_frames[:16]
print(f"Validation frames shape: {val_frames.shape}")  # should be (16,256,256,3)

# ===== Prepare your training frames as before =====
train_videos_folder = os.path.join(root_folder, "train", "BenchPress")
train_frames = load_videos_from_folder_subset(
    train_videos_folder,
    target_size=(256,256),
    max_frames=500,
    num_videos=10
)
print(f"Training frames shape: {train_frames.shape}")

No GPU detected; running on CPU.
Contents of validation folder: ['v_BenchPress_g12_c06.avi', 'v_BenchPress_g02_c07.avi', 'v_BenchPress_g03_c01.avi', 'v_BenchPress_g02_c01.avi', 'v_BenchPress_g12_c03.avi', 'v_BenchPress_g19_c03.avi', 'v_BenchPress_g09_c03.avi', 'v_BenchPress_g18_c06.avi', 'v_BenchPress_g09_c05.avi', 'v_BenchPress_g19_c06.avi', 'v_BenchPress_g09_c06.avi', 'v_BenchPress_g22_c03.avi', 'v_BenchPress_g05_c05.avi', 'v_BenchPress_g22_c05.avi', 'v_BenchPress_g15_c01.avi', 'v_BenchPress_g04_c05.avi', 'v_BenchPress_g20_c05.avi', 'v_BenchPress_g20_c07.avi', 'v_BenchPress_g17_c05.avi', 'v_BenchPress_g07_c05.avi']
Loading video: /Users/omvishal/Desktop/B/val/BenchPress/v_BenchPress_g02_c01.avi
Loaded 16 frames from /Users/omvishal/Desktop/B/val/BenchPress/v_BenchPress_g02_c01.avi
Total frames loaded from folder /Users/omvishal/Desktop/B/val/BenchPress: 16
Validation frames shape: (16, 256, 256, 3)
Loading video: /Users/omvishal/Desktop/B/train/BenchPress/v_BenchPress_g01_c01.avi
Loa

In [37]:
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from skimage.metrics import peak_signal_noise_ratio, structural_similarity

def train_temporal_gan_with_visuals(
    frames,
    val_frames,
    epochs=50,
    batch_size=8,
    latent_dim=128,
    output_dir="synthetic_frames_with_visuals",
    val_interval=1,
    sample_interval=10
):
    os.makedirs(output_dir, exist_ok=True)
    
    gen_losses, disc_losses = [], []
    psnr_scores, ssim_scores = [], []
    
    dataset = create_dataset(frames, batch_size)
    tgan = TemporalGAN(frame_shape=(256,256,3), latent_dim=latent_dim)
    steps_per_epoch = len(frames) // batch_size
    
    for epoch in range(epochs):
        batch_g, batch_d = [], []
        for step, real in enumerate(dataset):
            if step >= steps_per_epoch:
                break
            
            # Generator updates (×2)
            for _ in range(2):
                noise = tf.random.normal([batch_size, latent_dim])
                with tf.GradientTape() as gt:
                    fake = tgan.generator(noise, training=True)
                    loss_g = tgan.generator_loss(tgan.discriminator(fake, training=True))
                grads = gt.gradient(loss_g, tgan.generator.trainable_variables)
                tgan.gen_optimizer.apply_gradients(zip(grads, tgan.generator.trainable_variables))
            
            # Discriminator update (×1)
            noise = tf.random.normal([batch_size, latent_dim])
            with tf.GradientTape() as dt:
                fake = tgan.generator(noise, training=True)
                real_out = tgan.discriminator(real, training=True)
                fake_out = tgan.discriminator(fake, training=True)
                loss_d = tgan.discriminator_loss(real_out, fake_out)
            grads = dt.gradient(loss_d, tgan.discriminator.trainable_variables)
            tgan.disc_optimizer.apply_gradients(zip(grads, tgan.discriminator.trainable_variables))
            
            batch_g.append(loss_g.numpy())
            batch_d.append(loss_d.numpy())
        
        # Record average losses
        avg_g, avg_d = np.mean(batch_g), np.mean(batch_d)
        gen_losses.append(avg_g)
        disc_losses.append(avg_d)
        print(f"Epoch {epoch+1}/{epochs} – gen_loss={avg_g:.4f}, disc_loss={avg_d:.4f}")
        
        # PSNR/SSIM logging
        if val_frames.size and (epoch+1) % val_interval == 0:
            noise = tf.random.normal([len(val_frames), latent_dim])
            gen_val = tgan.generator(noise, training=False).numpy()
            real_uint = ((val_frames + 1) * 127.5).astype(np.uint8)
            fake_uint = ((gen_val     + 1) * 127.5).astype(np.uint8)
            
            ps = [peak_signal_noise_ratio(r, f, data_range=255)
                  for r, f in zip(real_uint, fake_uint)]
            ss = [structural_similarity(r, f, data_range=255, channel_axis=-1)
                  for r, f in zip(real_uint, fake_uint)]
            
            ep_psnr, ep_ssim = np.mean(ps), np.mean(ss)
            psnr_scores.append(ep_psnr)
            ssim_scores.append(ep_ssim)
            print(f" → val PSNR={ep_psnr:.2f}, SSIM={ep_ssim:.4f}")
        else:
            print(" → Skipping PSNR/SSIM this epoch.")
        
        # **New**: Real vs. Generated frame grids
        if (epoch+1) % sample_interval == 0:
            # pick first 4 validation frames
            real_sel = val_frames[:4]
            noise = tf.random.normal([4, latent_dim])
            fake_sel = tgan.generator(noise, training=False).numpy()
            
            real_disp = ((real_sel + 1) * 127.5).astype(np.uint8)
            fake_disp = ((fake_sel + 1) * 127.5).astype(np.uint8)
            
            fig, axes = plt.subplots(2, 4, figsize=(12, 6))
            for i in range(4):
                axes[0, i].imshow(real_disp[i])
                axes[0, i].axis('off')
                axes[1, i].imshow(fake_disp[i])
                axes[1, i].axis('off')
            fig.suptitle(f"Epoch {epoch+1}: Real (top) vs Generated (bottom)")
            fig.tight_layout()
            
            grid_path = os.path.join(output_dir, f"epoch{epoch+1}_comparison.png")
            fig.savefig(grid_path)
            plt.close(fig)
            print(f" → Saved comparison grid: {grid_path}")
    
    # Final plot: losses + metrics
    plt.figure(figsize=(10,6))
    plt.plot(gen_losses, label='Generator Loss')
    plt.plot(disc_losses, label='Discriminator Loss')
    plt.plot(psnr_scores, label='PSNR')
    plt.plot(ssim_scores, label='SSIM')
    plt.xlabel('Epoch')
    plt.legend()
    plt.savefig(os.path.join(output_dir, "training_and_metrics.png"))
    plt.close()
    
    return tgan

In [39]:
trained_gan = train_temporal_gan_with_visuals(
    frames=train_frames,
    val_frames=val_frames,
    epochs=50,
    batch_size=8,
    latent_dim=128,
    output_dir="exp_with_visuals",
    val_interval=1,
    sample_interval=10
)


Initializing TemporalGAN...
Building generator with residual blocks...
Generator built.
Building simplified discriminator...
Discriminator built.
TemporalGAN initialized.
Epoch 1/50 – gen_loss=1.1724, disc_loss=1.1147
 → val PSNR=9.34, SSIM=0.0999
Epoch 2/50 – gen_loss=1.0756, disc_loss=1.2155
 → val PSNR=9.04, SSIM=0.1166
Epoch 3/50 – gen_loss=1.0808, disc_loss=1.1682
 → val PSNR=8.48, SSIM=0.1053
Epoch 4/50 – gen_loss=1.1456, disc_loss=1.1332
 → val PSNR=8.99, SSIM=0.0978
Epoch 5/50 – gen_loss=1.2530, disc_loss=1.0772
 → val PSNR=8.41, SSIM=0.0885
Epoch 6/50 – gen_loss=1.4029, disc_loss=1.0167
 → val PSNR=9.37, SSIM=0.1281
Epoch 7/50 – gen_loss=1.4912, disc_loss=0.9961
 → val PSNR=9.31, SSIM=0.1398
Epoch 8/50 – gen_loss=1.5512, disc_loss=0.9391
 → val PSNR=9.42, SSIM=0.1415
Epoch 9/50 – gen_loss=1.6183, disc_loss=0.9245
 → val PSNR=9.24, SSIM=0.1364
Epoch 10/50 – gen_loss=1.7699, disc_loss=0.9025
 → val PSNR=8.64, SSIM=0.1214
 → Saved comparison grid: exp_with_visuals/epoch10_compari

KeyboardInterrupt: 