In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.utils import save_image
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import numpy as np
from tqdm import tqdm
import glob


class VideoStyleTransfer:
    def __init__(self, device=None):
        if device is None:
            device = "cuda" if torch.cuda.is_available() else "cpu"
        self.device = torch.device(device)
        print(f"Using device: {self.device}")

        try:
            weights = models.VGG19_Weights.DEFAULT
            vgg = models.vgg19(weights=weights)
        except TypeError:
            vgg = models.vgg19(pretrained=True)

        self.vgg = vgg.features.to(self.device).eval()
        for param in self.vgg.parameters():
            param.requires_grad = False

        self.imagenet_mean = [0.485, 0.456, 0.406]
        self.imagenet_std = [0.229, 0.224, 0.225]

        self.mean = torch.tensor(self.imagenet_mean).view(1, 3, 1, 1).to(self.device)
        self.std = torch.tensor(self.imagenet_std).view(1, 3, 1, 1).to(self.device)

        self.content_idx = 21
        self.style_indices = [0, 5, 10, 19, 28]

    def _transform(self):
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(self.imagenet_mean, self.imagenet_std),
        ])

    def load_image(self, path):
        img = Image.open(path).convert("RGB")
        transform = self._transform()
        tensor = transform(img).unsqueeze(0).to(self.device)
        return tensor, img.size

    def denormalize(self, tensor):
        result = tensor.detach().clone()
        result = result * self.std + self.mean
        result = torch.clamp(result, 0, 1)
        return result.cpu()

    def get_feature_maps(self, x):
        features = []
        for i, layer in enumerate(self.vgg):
            x = layer(x)
            if i == self.content_idx or i in self.style_indices:
                features.append((i, x))
        return features

    def gram_matrix(self, x):
        batch_size, channels, height, width = x.size()
        features = x.view(batch_size, channels, height * width)
        gram = torch.bmm(features, features.transpose(1, 2))
        return gram / (height * width)

    def total_variation(self, img):
        horizontal_diff = torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:])
        vertical_diff = torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :])
        return horizontal_diff.mean() + vertical_diff.mean()

    def extract_frames_from_video(self, video_path, output_folder, frame_skip=1):
        print(f"Extracting frames from: {video_path}")
        
        os.makedirs(output_folder, exist_ok=True)
        
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            raise ValueError(f"Unable to open video: {video_path}")
        
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        
        print(f"Video details: {total_frames} frames, {width}x{height}, {fps:.2f} FPS")
        print(f"Frame skip: {frame_skip}")
        
        saved_count = 0
        
        for frame_idx in tqdm(range(0, total_frames, frame_skip)):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, frame = cap.read()
            if not ret:
                break
            
            frame_path = os.path.join(output_folder, f"frame_{saved_count:06d}.jpg")
            cv2.imwrite(frame_path, frame)
            saved_count += 1
        
        cap.release()
        print(f"Extracted {saved_count} frames to {output_folder}")
        return saved_count, fps, (width, height)

    def style_transfer_single_frame(
        self,
        content_path,
        style_path,
        output_path,
        content_weight=1e5,
        style_weight=3e4,
        tv_weight=1.0,
        optimizer_type="lbfgs",
        num_steps_lbfgs=100,
        lr_adam=10.0,
    ):
        content_img, content_size = self.load_image(content_path)
        style_img, _ = self.load_image(style_path)

        print(f"Processing frame: {content_size[0]}x{content_size[1]}")

        optimizing_img = content_img.clone().requires_grad_(True)

        with torch.no_grad():
            content_feats = self.get_feature_maps(content_img)
            style_feats = self.get_feature_maps(style_img)

        target_content = None
        for idx, feat in content_feats:
            if idx == self.content_idx:
                target_content = feat.squeeze(0)
                break

        target_style_grams = []
        for idx, feat in style_feats:
            if idx in self.style_indices:
                gram = self.gram_matrix(feat)
                target_style_grams.append(gram)

        if optimizer_type == "adam":
            optimizer = optim.Adam([optimizing_img], lr=lr_adam)
            max_iter = num_steps_lbfgs
        else:
            optimizer = optim.LBFGS([optimizing_img], max_iter=num_steps_lbfgs)
            max_iter = num_steps_lbfgs

        def compute_losses(img):
            feats = self.get_feature_maps(img)

            current_content = None
            style_feats_current = []
            for idx, feat in feats:
                if idx == self.content_idx:
                    current_content = feat.squeeze(0)
                if idx in self.style_indices:
                    style_feats_current.append(feat)

            content_loss = nn.MSELoss(reduction="mean")(current_content, target_content)

            style_loss = 0.0
            current_grams = [self.gram_matrix(f) for f in style_feats_current]
            for target_gram, current_gram in zip(target_style_grams, current_grams):
                style_loss += nn.MSELoss(reduction="sum")(target_gram, current_gram)
            style_loss /= len(target_style_grams)

            tv_loss = self.total_variation(img)

            total_loss = (
                content_weight * content_loss
                + style_weight * style_loss
                + tv_weight * tv_loss
            )
            return total_loss

        if optimizer_type == "adam":
            for step in range(max_iter):
                optimizer.zero_grad()
                total_loss = compute_losses(optimizing_img)
                total_loss.backward()
                optimizer.step()
                
                if step % 20 == 0:
                    print(f"Step {step}/{max_iter}, Loss: {total_loss.item():.4f}")
        else:
            step_counter = [0]
            def closure():
                optimizer.zero_grad()
                total_loss = compute_losses(optimizing_img)
                total_loss.backward()
                if step_counter[0] % 50 == 0:
                    print(f"Step {step_counter[0]}, Loss: {total_loss.item():.4f}")
                step_counter[0] += 1
                return total_loss
            optimizer.step(closure)

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        final_img = self.denormalize(optimizing_img)
        save_image(final_img, output_path)
        
        print(f"Saved: {os.path.basename(output_path)}")
        return optimizing_img

    def process_all_frames(
        self,
        frames_folder,
        style_path,
        output_folder,
        content_weight=1e5,
        style_weight=3e4,
        tv_weight=1.0,
        optimizer_type="lbfgs",
        num_steps_lbfgs=50,
        lr_adam=10.0,
        frame_skip=1
    ):
        print(f"Processing frames from: {frames_folder}")
        
        os.makedirs(output_folder, exist_ok=True)
        
        frame_files = sorted(glob.glob(os.path.join(frames_folder, "*.jpg")))
        if not frame_files:
            frame_files = sorted(glob.glob(os.path.join(frames_folder, "*.png")))
        
        if not frame_files:
            raise ValueError(f"No image files found in {frames_folder}")
        
        frame_files = frame_files[::frame_skip]
        
        print(f"Found {len(frame_files)} frames to process")
        
        successful_frames = 0
        
        for i, frame_file in enumerate(tqdm(frame_files, desc="Processing frames")):
            frame_name = os.path.basename(frame_file)
            output_path = os.path.join(output_folder, frame_name)
            
            if os.path.exists(output_path):
                print(f"Skipping already processed: {frame_name}")
                successful_frames += 1
                continue
                
            try:
                print(f"Processing frame {i+1}/{len(frame_files)}: {frame_name}")
                self.style_transfer_single_frame(
                    content_path=frame_file,
                    style_path=style_path,
                    output_path=output_path,
                    content_weight=content_weight,
                    style_weight=style_weight,
                    tv_weight=tv_weight,
                    optimizer_type=optimizer_type,
                    num_steps_lbfgs=num_steps_lbfgs,
                    lr_adam=lr_adam,
                )
                successful_frames += 1
                
            except Exception as e:
                print(f"Error processing {frame_file}: {e}")
                continue
        
        print(f"Processed {successful_frames}/{len(frame_files)} frames to {output_folder}")
        return successful_frames

    def create_video_from_frames(
        self,
        frames_folder,
        output_video_path,
        fps=30,
        frame_size=None
    ):
        print(f"Creating video from frames in: {frames_folder}")
        
        frame_files = sorted(glob.glob(os.path.join(frames_folder, "*.jpg")))
        if not frame_files:
            frame_files = sorted(glob.glob(os.path.join(frames_folder, "*.png")))
        
        if not frame_files:
            raise ValueError(f"No image files found in {frames_folder}")
        
        print(f"Found {len(frame_files)} frames for video creation")
        
        first_frame = cv2.imread(frame_files[0])
        if first_frame is None:
            raise ValueError(f"Unable to read first frame: {frame_files[0]}")
        
        if frame_size is None:
            frame_size = (first_frame.shape[1], first_frame.shape[0])
        
        print(f"Video resolution: {frame_size[0]}x{frame_size[1]} at {fps} FPS")
        
        codec_configs = [
            ('XVID', '.avi'),
            ('MJPG', '.avi'),
            ('mp4v', '.mp4'),
        ]
        
        video_writer = None
        final_output_path = output_video_path
        
        for codec, ext in codec_configs:
            try:
                temp_path = output_video_path.rsplit('.', 1)[0] + ext
                fourcc = cv2.VideoWriter_fourcc(*codec)
                video_writer = cv2.VideoWriter(temp_path, fourcc, fps, frame_size)
                
                if video_writer.isOpened():
                    print(f"Using codec: {codec}")
                    final_output_path = temp_path
                    break
                else:
                    if video_writer:
                        video_writer.release()
                    video_writer = None
            except Exception as e:
                print(f"Failed with {codec}: {e}")
                continue
        
        if video_writer is None:
            video_writer = cv2.VideoWriter(final_output_path, 0, fps, frame_size)
            if not video_writer.isOpened():
                raise RuntimeError("Unable to create video writer")

        for frame_file in tqdm(frame_files, desc="Creating video"):
            frame = cv2.imread(frame_file)
            if frame is None:
                print(f"Unable to read frame: {frame_file}")
                continue
            
            if (frame.shape[1], frame.shape[0]) != frame_size:
                frame = cv2.resize(frame, frame_size)
            
            video_writer.write(frame)
        
        video_writer.release()
        print(f"Video created: {final_output_path}")
        print(f"Resolution: {frame_size[0]}x{frame_size[1]}")
        print(f"FPS: {fps}")
        print(f"Total frames: {len(frame_files)}")
        
        return final_output_path

    def process_video(
        self,
        video_path,
        style_path,
        output_video_name="video-final-output",
        content_weight=1e5,
        style_weight=3e4,
        tv_weight=1.0,
        optimizer_type="lbfgs",
        num_steps_lbfgs=50,
        lr_adam=10.0,
        frame_skip=1,
        fps=None
    ):
        frames_folder = "/kaggle/working/video-frames"
        output_frames_folder = "/kaggle/working/video-frame-outputs"
        output_video_path = f"/kaggle/working/{output_video_name}.mp4"
        
        print("Starting video style transfer pipeline")
        print(f"Input video: {video_path}")
        print(f"Style image: {style_path}")
        print(f"Output video: {output_video_path}")
        print(f"Frame skip: {frame_skip}")
        
        print("Step 1: Extracting frames from video")
        frame_count, original_fps, original_size = self.extract_frames_from_video(
            video_path=video_path,
            output_folder=frames_folder,
            frame_skip=frame_skip
        )
        
        print(f"Original video size: {original_size[0]}x{original_size[1]}")
        
        if fps is None:
            fps = original_fps / frame_skip
            print(f"Using FPS: {fps:.2f}")
        
        print("Step 2: Applying style transfer to frames")
        processed_count = self.process_all_frames(
            frames_folder=frames_folder,
            style_path=style_path,
            output_folder=output_frames_folder,
            content_weight=content_weight,
            style_weight=style_weight,
            tv_weight=tv_weight,
            optimizer_type=optimizer_type,
            num_steps_lbfgs=num_steps_lbfgs,
            lr_adam=lr_adam,
            frame_skip=1
        )
        
        print("Step 3: Creating video from processed frames")
        final_video_path = self.create_video_from_frames(
            frames_folder=output_frames_folder,
            output_video_path=output_video_path,
            fps=fps,
            frame_size=original_size
        )
        
        print("Pipeline completed")
        print(f"Extracted frames: {frame_count}")
        print(f"Processed frames: {processed_count}")
        print(f"Original resolution: {original_size[0]}x{original_size[1]}")
        print(f"Final video: {final_video_path}")
        
        return final_video_path


if __name__ == "__main__":
    processor = VideoStyleTransfer()
    
    VIDEO_PATH = "/kaggle/input/finalvideo/V1.mp4"
    STYLE_PATH = "/kaggle/input/finalvideo/sea-landscape-with-digital-art-style (1).jpg"
    
    print("Starting Video Style Transfer")
    print("=" * 50)
    
    try:
        final_video = processor.process_video(
            video_path=VIDEO_PATH,
            style_path=STYLE_PATH,
            output_video_name="video-final-output",
            content_weight=1e5,
            style_weight=3e4,
            tv_weight=1.0,
            optimizer_type="lbfgs",
            num_steps_lbfgs=300,
            frame_skip=1,
            fps=30
        )
        
        print(f"Success: {final_video}")
        
        output_frames_folder = "/kaggle/working/video-frame-outputs"
        frame_files = sorted(glob.glob(os.path.join(output_frames_folder, "*.jpg")))[:3]
        
        if frame_files:
            fig, axes = plt.subplots(1, min(3, len(frame_files)), figsize=(15, 5))
            if len(frame_files) == 1:
                axes = [axes]
            
            for i, frame_file in enumerate(frame_files):
                img = Image.open(frame_file)
                axes[i].imshow(img)
                axes[i].set_title(f"Frame {i+1} - {img.size[0]}x{img.size[1]}")
                axes[i].axis('off')
            
            plt.tight_layout()
            plt.show()
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

    print("Disk usage summary:")
    frames_size = sum(os.path.getsize(os.path.join("/kaggle/working/video-frames", f)) 
                     for f in os.listdir("/kaggle/working/video-frames") 
                     if os.path.isfile(os.path.join("/kaggle/working/video-frames", f))) / (1024*1024)
    output_size = sum(os.path.getsize(os.path.join("/kaggle/working/video-frame-outputs", f)) 
                     for f in os.listdir("/kaggle/working/video-frame-outputs") 
                     if os.path.isfile(os.path.join("/kaggle/working/video-frame-outputs", f))) / (1024*1024)
    print(f"Input frames: {frames_size:.1f} MB")
    print(f"Output frames: {output_size:.1f} MB")

Using device: cuda


Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:02<00:00, 211MB/s]  


Starting Video Style Transfer
Starting video style transfer pipeline
Input video: /kaggle/input/finalvideo/V1.mp4
Style image: /kaggle/input/finalvideo/sea-landscape-with-digital-art-style (1).jpg
Output video: /kaggle/working/video-final-output.mp4
Frame skip: 1
Step 1: Extracting frames from video
Extracting frames from: /kaggle/input/finalvideo/V1.mp4
Video details: 73 frames, 720x1280, 30.10 FPS
Frame skip: 1


100%|██████████| 73/73 [00:03<00:00, 22.65it/s]


Extracted 73 frames to /kaggle/working/video-frames
Original video size: 720x1280
Step 2: Applying style transfer to frames
Processing frames from: /kaggle/working/video-frames
Found 73 frames to process


Processing frames:   0%|          | 0/73 [00:00<?, ?it/s]

Processing frame 1/73: frame_000000.jpg
Processing frame: 720x1280
Step 0, Loss: 2953971712.0000
Step 50, Loss: 35346464.0000
Step 100, Loss: 13362424.0000
Step 150, Loss: 8298064.5000
Step 200, Loss: 6155570.5000
Step 250, Loss: 4971092.5000


Processing frames:   1%|▏         | 1/73 [01:59<2:23:25, 119.53s/it]

Saved: frame_000000.jpg
Processing frame 2/73: frame_000001.jpg
Processing frame: 720x1280
Step 0, Loss: 2934895104.0000
Step 50, Loss: 36913260.0000
