<a href="https://colab.research.google.com/github/lukkychan/resolution_changer/blob/main/new3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!mkdir -p frames/720p
!mkdir -p frames/360p
!mkdir scaled
!mkdir predicted
!mkdir dataset
!mkdir -p dataset/720p
!mkdir -p dataset/360p
!mkdir checkpoints

In [3]:
import cv2

# Specify the path to the video file
video_path = "/content/drive/MyDrive/model/1080p.mp4"

# Specify the output folder to save the frames
output_folder = "/content/frames/360p/"

# Open the video file
video = cv2.VideoCapture(video_path)

# Get the total number of frames in the video
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
print(total_frames)

# Calculate the frame interval to evenly sample the frames
frame_interval = max(total_frames//125, 1)

# Initialize a counter to keep track of the extracted frames
frame_count_1 = 0

# Loop through the frames and extract the desired number of frames
while frame_count_1 <= 125 :
    # Read the current frame
    ret, frame = video.read()

    # Check if the frame was successfully read
    if not ret:
        break

    # Save the frame as an image file
    frame_path = f"{output_folder}{frame_count_1}.jpg"
    cv2.imwrite(frame_path, frame)

    # Increment the frame count
    frame_count_1 += 1

    # Move to the next frame based on the frame interval
    video.set(cv2.CAP_PROP_POS_FRAMES, frame_count_1 * frame_interval)

# Release the video capture object
video.release()
print(frame_count_1)
print("done.")


2987
126
done.


In [4]:
import cv2

# Specify the path to the video file
video_path = "/content/drive/MyDrive/model/720p.mp4"

# Specify the output folder to save the frames
output_folder = "/content/frames/720p/"

# Open the video file
video = cv2.VideoCapture(video_path)

# Get the total number of frames in the video
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
print(total_frames)

# Calculate the frame interval to evenly sample the frames
frame_interval = max(total_frames// 125, 1)

# Initialize a counter to keep track of the extracted frames
frame_count_2 = 0

# Loop through the frames and extract the desired number of frames
while frame_count_2 <= 125 :
    # Read the current frame
    ret, frame = video.read()

    # Check if the frame was successfully read
    if not ret:
        break

    # Save the frame as an image file
    frame_path = f"{output_folder}{frame_count_2}.jpg"
    cv2.imwrite(frame_path, frame)

    # Increment the frame count
    frame_count_2 += 1

    # Move to the next frame based on the frame interval
    video.set(cv2.CAP_PROP_POS_FRAMES, frame_count_2 * frame_interval)

# Release the video capture object
video.release()
print(frame_count_2)
print("done.")


2986
126
done.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import cv2
from skimage.metrics import structural_similarity as ssim

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the original 1080p image
original_image = cv2.imread('/content/frames/360p/8.jpg')

# Downscale the image to 720p using OpenCV's resize function
downscaled_image = cv2.resize(original_image, (1280, 720))  # Assuming the original aspect ratio is 16:9

# Convert the images to tensors and move them to the GPU
downscaled_image = transforms.ToTensor()(downscaled_image).unsqueeze(0).to(device)
original_image = transforms.ToTensor()(original_image).unsqueeze(0).to(device)

# Create a CNN model for upscaling
class UpscaleModel(nn.Module):
    def __init__(self):
        super(UpscaleModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.up1 = nn.Upsample(scale_factor=1.875, mode='bilinear', align_corners=False)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
        self.up2 = nn.Upsample(scale_factor=1.5, mode='bilinear', align_corners=False)
        self.conv3 = nn.Conv2d(32, 3, kernel_size=3, padding=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.up1(x)
        x = self.conv2(x)
        x = self.up2(x)
        x = self.conv3(x)
        x = self.sigmoid(x)
        return x

# Create the model and move it to the GPU
model = UpscaleModel().to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Create a DataLoader for the training data
train_dataset = TensorDataset(downscaled_image, original_image)
train_loader = DataLoader(train_dataset, batch_size=1)

# Train the model
model.train()
for epoch in range(10):
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}: Loss = {running_loss/len(train_loader):.4f}")

# Use the trained model to upscale the 720p image back to its original resolution
model.eval()
with torch.no_grad():
    upscaled_image = model(downscaled_image).clamp(0, 1)

# Convert the upscaled image to numpy array and move it to the CPU
upscaled_image = upscaled_image.squeeze(0).cpu().numpy()

# Upscale the image to its original resolution using OpenCV's resize function
upscaled_image = cv2.resize(upscaled_image, (1920, 1080))

# Convert the upscaled image to a tensor and move it to the GPU
upscaled_image = transforms.ToTensor()(upscaled_image).unsqueeze(0).to(device)

# Calculate SSIM
ssim_score = ssim(original_image.squeeze(0).cpu().numpy().transpose(1, 2, 0), upscaled_image.squeeze(0).cpu().numpy().transpose(1, 2, 0), multichannel=True)

# Calculate PSNR
mse = torch.mean((original_image - upscaled_image)**2)
psnr = 10 * torch.log10(255.0**2 / mse)

print(f"SSIM: {ssim_score:.4f}")
print(f"PSNR: {psnr.item():.2f} dB")


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: ignored

In [5]:
import gc
gc.collect()

0

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import cv2

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the original 1080p image
original_image = cv2.imread('/content/frames/360p/8.jpg')

# Downscale the image to 720p using OpenCV's resize function
downscaled_image = cv2.resize(original_image, (1920, 1080))  # Assuming the original aspect ratio is 16:9

# Convert the images to tensors and move them to the GPU
downscaled_image = transforms.ToTensor()(downscaled_image).unsqueeze(0).to(device)
original_image = transforms.ToTensor()(original_image).unsqueeze(0).to(device)

# Create a CNN model for upscaling
class UpscaleModel(nn.Module):
    def __init__(self):
        super(UpscaleModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.up1 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
        self.up2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)
        self.conv3 = nn.Conv2d(32, 3, kernel_size=3, padding=1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.up1(x)
        x = self.conv2(x)
        x = self.up2(x)
        x = self.conv3(x)
        x = self.sigmoid(x)
        return x

# Create the model and move it to the GPU
model = UpscaleModel().to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Create a DataLoader for the training data
train_dataset = TensorDataset(downscaled_image, original_image)
train_loader = DataLoader(train_dataset, batch_size=1)

# Train the model
model.train()
for epoch in range(10):
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}: Loss = {running_loss/len(train_loader):.4f}")

# Use the trained model to upscale the 720p image back to 1080p
model.eval()
with torch.no_grad():
    upscaled_image = model(downscaled_image)

# Convert the upscaled image tensor to a numpy array and remove the batch dimension
upscaled_image = upscaled_image.squeeze(0).cpu().numpy()

# Convert the image back to the range of 0-255 and change the channel order to BGR for OpenCV
upscaled_image = (upscaled_image * 255).clip(0, 255).transpose(1, 2, 0).astype('uint8')

# Save the upscaled image
cv2.imwrite('/content/upscaled.jpg', upscaled_image)


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: ignored

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import cv2
import torchvision

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the original 1080p image
original_image = cv2.imread('/content/frames/360p/8.jpg')

# Calculate the scale factor
original_width = original_image.shape[1]
target_width = 1920
scale_factor = target_width / original_width

# Downscale the image using OpenCV's resize function
downscaled_image = cv2.resize(original_image, (int(original_width * scale_factor), int(original_image.shape[0] * scale_factor)))

# Convert the images to tensors and move them to the GPU
downscaled_image = transforms.ToTensor()(downscaled_image).unsqueeze(0).to(device)
original_image = transforms.ToTensor()(original_image).unsqueeze(0).to(device)

# Create a CNN model for upscaling
class UpscaleModel(nn.Module):
    def __init__(self):
        super(UpscaleModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(64, 3, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = self.relu(self.conv5(x))
        x = self.conv6(x)
        x = self.sigmoid(x)
        return x

# Create the model and move it to the GPU
model = UpscaleModel().to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Create a DataLoader for the training data
train_dataset = TensorDataset(downscaled_image, original_image)
train_loader = DataLoader(train_dataset, batch_size=1)

# Train the model
model.train()
for epoch in range(10):
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}: Loss = {running_loss/len(train_loader):.4f}")

# Save the trained model
torch.save(model.state_dict(), 'upscale_model.pth')

import gc
gc.collect()


2nd

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import cv2
import torchvision
import time

start_time = time.time()

# Check if a GPU is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

image_paths = []
for i in range(0,10):
  image_paths.append(f"/content/frames/360p/{i}.jpg")


# Create a CNN model for upscaling
class UpscaleModel(nn.Module):
    def __init__(self):
        super(UpscaleModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(64, 3, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = self.relu(self.conv5(x))
        x = self.conv6(x)
        x = self.sigmoid(x)
        return x

checkpoints = []

for image_path in image_paths:
  # Load the original 1080p image
  original_image = cv2.imread(image_path)

  # Calculate the scale factor
  original_width = original_image.shape[1]
  target_width = 1920
  scale_factor = target_width / original_width

  # Downscale the image using OpenCV's resize function
  downscaled_image = cv2.resize(original_image, (int(original_width * scale_factor), int(original_image.shape[0] * scale_factor)))

  # Convert the images to tensors and move them to the GPU
  downscaled_image = transforms.ToTensor()(downscaled_image).unsqueeze(0).to(device)
  original_image = transforms.ToTensor()(original_image).unsqueeze(0).to(device)
  # Create the model and move it to the GPU
  model = UpscaleModel().to(device)

  # Define the loss function and optimizer
  criterion = nn.MSELoss()
  optimizer = optim.Adam(model.parameters(), lr=0.001)

  # Create a DataLoader for the training data
  train_dataset = TensorDataset(downscaled_image, original_image)
  train_loader = DataLoader(train_dataset, batch_size=2)

  # Train the model
  model.train()
  best_loss = float('inf')  # Initialize best_loss
  for epoch in range(10):
      running_loss = 0.0
      for inputs, labels in train_loader:
          inputs, labels = inputs.to(device), labels.to(device)

          optimizer.zero_grad()

          outputs = model(inputs)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          running_loss += loss.item()
    # Check if the current loss is the best so far
      if running_loss < best_loss:
        best_loss = running_loss
        # Save the current model checkpoint
        checkpoints.append(model.state_dict())

    # Check if the model is overfitting
      if running_loss > 1.2 * best_loss:
        print("Model is overfitting. Stopping training.")
        break
      elif (running_loss/len(train_loader)) == 0.0000 :
        break
      else:
        print(f"Epoch {epoch+1}: Loss = {running_loss/len(train_loader):.4f}")

# Save the trained model
torch.save(model.state_dict(), 'upscale_model.pth')
end_time = time.time()
execution_time = end_time - start_time
print("Execution time:", execution_time/60)

import gc
gc.collect()


Epoch 1: Loss = 0.2474
Epoch 2: Loss = 0.2330
Epoch 3: Loss = 0.1757
Epoch 4: Loss = 0.0599
Epoch 5: Loss = 0.0010
Epoch 6: Loss = 0.0000


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import cv2
from skimage.metrics import structural_similarity as ssim


model = UpscaleModel()
model.load_state_dict(torch.load('upscale_model.pth'))
# Use the trained model to upscale the downscaled image
model.eval()
with torch.no_grad():
    upscaled_image = model(downscaled_image)

# Convert the upscaled image to a numpy array and move it to the CPU
upscaled_image = upscaled_image.squeeze(0).cpu().numpy()

# Upscale the image using OpenCV's resize function
upscaled_image = cv2.resize(upscaled_image.transpose(1, 2, 0), (original_width, original_image.shape[2]))

# Convert the upscaled image to a tensor and move it to the GPU
upscaled_image = transforms.ToTensor()(upscaled_image).unsqueeze(0).to(device)

# Calculate the SSIM and PSNR
ssim_score = ssim(original_image.squeeze(0).permute(1, 2, 0).cpu().numpy(), upscaled_image.squeeze(0).permute(1, 2, 0).cpu().numpy(), multichannel=True)
mse = torch.mean((original_image - upscaled_image)**2)
psnr = 10 * torch.log10(1.0 / mse)

print(f"SSIM: {ssim_score:.4f}")
print(f"PSNR: {psnr.item():.2f} dB")

  ssim_score = ssim(original_image.squeeze(0).permute(1, 2, 0).cpu().numpy(), upscaled_image.squeeze(0).permute(1, 2, 0).cpu().numpy(), multichannel=True)


SSIM: 0.9713
PSNR: 49.25 dB


In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import cv2

# Load the trained model
model = UpscaleModel()
model.load_state_dict(torch.load('upscale_model.pth'))
model.eval()

# Load the downscaled image
downscaled_image = cv2.imread('/content/frames/360p/110.jpg')

# Convert the image to a tensor and normalize it
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
input_image = transform(downscaled_image).unsqueeze(0)

# Upscale the image using the trained model
with torch.no_grad():
    upscaled_image = model(input_image)

# Convert the upscaled image tensor to a numpy array
upscaled_image = upscaled_image.squeeze(0).permute(1, 2, 0).numpy()

# Convert the upscaled image back to the original range (0-255)
upscaled_image = (upscaled_image * 255).astype('uint8')

# Display or save the upscaled image
#cv2.imshow('Upscaled Image', upscaled_image)
cv2.imwrite("output.jpg", upscaled_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [None]:
import numpy as np
from PIL import Image

# Load the original and predicted images
original_image = Image.open('/content/frames/360p/8.jpg')
predicted_image = Image.open('/content/output.jpg')

# Convert images to numpy arrays
original_array = np.array(original_image)
predicted_array = np.array(predicted_image.convert('RGB'))  # Convert predicted image to RGB mode

# Calculate the pixel-wise difference
diff_array = np.abs(original_array - predicted_array)

# Calculate the number of mismatched pixels
num_mismatched_pixels = np.sum(diff_array > 0)

# Calculate the total number of pixels
total_pixels = original_array.size

# Calculate the accuracy as the percentage of matching pixels
accuracy = ((total_pixels - num_mismatched_pixels) / total_pixels) * 100

print(f"Accuracy: {accuracy:.2f}%")

Accuracy: 22.73%
