In [1]:
import cv2
import numpy as np

def LoadVideoFrames(path):
    videoCapture = cv2.VideoCapture(path)

    if not videoCapture.isOpened():
        raise IOError(f"Cannot open video file: {path}")
    
    frameList = []

    while True:
        bSuccess, frame = videoCapture.read()

        if not bSuccess:
            break

        frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frameList.append(frameRGB)

    videoCapture.release()

    result = np.array(frameList)

    return result

frames = LoadVideoFrames("./Data/Baseball.mp4")
print(frames.shape)

(1798, 1080, 1920, 3)


In [2]:
def ShowFrames(frames):
    for frame in frames:
        frameBGR = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        
        cv2.imshow("Video Frame", frameBGR)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break;

    cv2.destroyAllWindows()

In [20]:
ShowFrames(frames)

In [4]:
import torch
import torch.nn.functional as F

device = "cuda" if torch.cuda.is_available else "cpu"
print(f"Using '{device}' device")

Using 'cuda' device


In [5]:
def DownscaleFrames(frames):
    SPLIT_COUNT = 32

    count = int(np.ceil(frames.shape[0] / SPLIT_COUNT))

    # (프레임수, 높이, 너비, 채널) -> (프레임수, 채널, 높이, 너비)
    frameTensor = torch.from_numpy(frames[0:count]).permute(0, 3, 1, 2).float() / 255.0

    if torch.cuda.is_available:
        frameTensor = frameTensor.to("cuda")

    downscaledTensor = F.interpolate(frameTensor, size = (360, 640), mode = "area")
    downscaledFrames = (downscaledTensor * 255).byte().permute(0, 2, 3, 1).cpu().numpy()

    result = downscaledFrames

    for i in range(1, SPLIT_COUNT):
        begin = i * count
        end = begin + count

        if end > frames.shape[0]:
            end = frames.shape[0]

        frameTensor = torch.from_numpy(frames[begin:end]).permute(0, 3, 1, 2).float() / 255.0

        if torch.cuda.is_available:
            frameTensor = frameTensor.to("cuda")

        downscaledTensor = F.interpolate(frameTensor, size = (360, 640), mode = "area")
        downscaledFrames = (downscaledTensor * 255).byte().permute(0, 2, 3, 1).cpu().numpy()

        result = np.concatenate((result, downscaledFrames), axis = 0)

    return result

downscaledFrames = DownscaleFrames(frames)
print(downscaledFrames.shape)

(1798, 360, 640, 3)


In [21]:
ShowFrames(downscaledFrames)

In [1]:
import torch.nn as nn

class ESPCN(nn.Module):
    def __init__(self, scaleFactor):
        super(ESPCN, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size = 5, padding = 2), 
            nn.Tanh(), 
            nn.Conv2d(64, 32, kernel_size = 3, padding = 1), 
            nn.Tanh(), 
            nn.Conv2d(32, scaleFactor ** 2, kernel_size = 3, padding = 1), 
            nn.PixelShuffle(scaleFactor)
        )

    def forward(self, x):
        return self.layers(x)

In [None]:
def TrainESPCN(frames360, frames1080, scaleFactor = 3, numEpochs = 10, batchSize = 4):
    model = ESPCN(scaleFactor).cuda()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters())

    frameCount = len(frames1080)

    model.train()
    for epoch in range(numEpochs):
        totalLoss = 0.0

        for i in range(0, frameCount, batchSize):
            end = i + batchSize
            if end > frameCount:
                end = frameCount

            inputBatch = frames360[i:end]
            targetBatch = frames1080[i:end]

            inputYFrames = []
            targetYFrames = []
            for j in range(len(inputBatch)):
                inputYCrCb = cv2.cvtColor(inputBatch[j], cv2.COLOR_RGB2YCrCb)
                targetYCrCb = cv2.cvtColor(targetBatch[j], cv2.COLOR_RGB2YCrCb)

                inputY = inputYCrCb[:, :, 0]
                targetY = targetYCrCb[:, :, 0]

                inputYFrames.append(inputY)
                targetYFrames.append(targetY)

            inputYFrames = np.array(inputYFrames)
            targetYFrames = np.array(targetYFrames)

            inputTensor = (torch.from_numpy(inputYFrames).float().unsqueeze(1) / 255.0).cuda()
            targetTensor = (torch.from_numpy(targetYFrames).float().unsqueeze(1) / 255.0).cuda()

            output = model(inputTensor)
            loss = criterion(output, targetTensor)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            totalLoss += loss.item()

            del inputTensor, targetTensor, output
            torch.cuda.empty_cache()

        print(f"Epoch: {epoch}, Loss: {totalLoss}")
    
    return model

trainedModel = TrainESPCN(downscaledFrames, frames, batchSize = 16)

Epoch: 0, Loss: 1.2759526844602078
Epoch: 1, Loss: 0.24084335431689397
Epoch: 2, Loss: 0.15981913701398298
Epoch: 3, Loss: 0.12406829014071263
Epoch: 4, Loss: 0.10814596191630699
Epoch: 5, Loss: 0.0987334126548376
Epoch: 6, Loss: 0.09314719200483523
Epoch: 7, Loss: 0.08902086643502116
Epoch: 8, Loss: 0.08506300006411038
Epoch: 9, Loss: 0.08096390042919666


In [11]:
torch.save(trainedModel.state_dict(), "ESPCN.pth")

In [8]:
def LoadModel(path, scaleFactor):
    model = ESPCN(scaleFactor)
    model.load_state_dict(torch.load(path, map_location = "cuda"))
    model = model.cuda()
    model.eval()

    return model

In [9]:
def GetSRFrames(model, frames, scaleFactor):
    upscaledFrames = []

    for frame in frames:
        yCrCb = cv2.cvtColor(frame, cv2.COLOR_RGB2YCrCb)
        y = yCrCb[:, :, 0]
        cr = yCrCb[:, :, 1]
        cb = yCrCb[:, :, 2]

        yTensor = torch.from_numpy(y).float().unsqueeze(0).unsqueeze(0) / 255.0
        yTensor = yTensor.cuda()

        with torch.no_grad():
            outputY = model(yTensor).clamp(0.0, 1.0)

        outputY = outputY.squeeze().cpu().numpy() * 255.0
        outputY = outputY.astype(np.uint8)

        height, width = outputY.shape
        crUp = cv2.resize(cr, (width, height), interpolation = cv2.INTER_CUBIC)
        cbUp = cv2.resize(cb, (width, height), interpolation = cv2.INTER_CUBIC)

        yCrCbUp = cv2.merge([outputY, crUp, cbUp])
        rgbUpscaled = cv2.cvtColor(yCrCbUp, cv2.COLOR_YCrCb2RGB)

        upscaledFrames.append(rgbUpscaled)

    return np.array(upscaledFrames)

In [10]:
model = LoadModel("ESPCN.pth", 3)
# superResolutionFrames = GetSRFrames(model, downscaledFrames, 3)
# print(superResolutionFrames)

  model.load_state_dict(torch.load(path, map_location = "cuda"))


In [11]:
for i in range(len(frames)):
    original = frames[i]
    sr = superResolutionFrames[i]

    # RGB → BGR (OpenCV는 BGR 사용)
    originalBgr = cv2.cvtColor(original, cv2.COLOR_RGB2BGR)
    srBgr = cv2.cvtColor(sr, cv2.COLOR_RGB2BGR)
    # 창에 출력
    cv2.imshow("Original Frame", originalBgr)
    cv2.imshow("sr Frame", srBgr)

    if cv2.waitKey(33) & 0xFF == ord('q'):
        break

cv2.destroyAllWindows()

In [13]:
# 영상 저장
height, width = frames[0].shape[:2]
size = (width, height)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter("./Data/srVideo.mp4", fourcc, 60, size)

for frame in superResolutionFrames:
    bgrFrame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    writer.write(bgrFrame)

writer.release()
print("완료")

완료


In [22]:
def ShowSRFrames(model, frames, scaleFactor):
    for frame in frames:
        yCrCb = cv2.cvtColor(frame, cv2.COLOR_RGB2YCrCb)
        y = yCrCb[:, :, 0]
        cr = yCrCb[:, :, 1]
        cb = yCrCb[:, :, 2]

        yTensor = torch.from_numpy(y).float().unsqueeze(0).unsqueeze(0) / 255.0
        yTensor = yTensor.cuda()

        with torch.no_grad():
            outputY = model(yTensor).clamp(0.0, 1.0)

        outputY = outputY.squeeze().cpu().numpy() * 255.0
        outputY = outputY.astype(np.uint8)

        height, width = outputY.shape
        crUp = cv2.resize(cr, (width, height), interpolation = cv2.INTER_CUBIC)
        cbUp = cv2.resize(cb, (width, height), interpolation = cv2.INTER_CUBIC)

        yCrCbUp = cv2.merge([outputY, crUp, cbUp])
        rgbUpscaled = cv2.cvtColor(yCrCbUp, cv2.COLOR_YCrCb2BGR)
###################################################################################
        cv2.imshow("SuperResolution", rgbUpscaled)
        
        if cv2.waitKey(33) & 0xFF == ord('q'):
            break;

    cv2.destroyAllWindows()

ShowSRFrames(model, downscaledFrames, 3)

In [3]:
import cv2
import numpy as np
import torch
import torch.nn.functional as F

In [3]:

device = "cuda" if torch.cuda.is_available else "cpu"
print(f"Using '{device}' device")

FRAMES_PER_BATCH = 24

model = ESPCN(3).cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

model.train()
for epoch in range(10):

    capture360 = cv2.VideoCapture("./Data/Baseball360p.mp4")
    capture1080 = cv2.VideoCapture("./Data/Baseball1080p.mp4")

    # capture360 = cv2.VideoCapture("./Data/clip360p.mp4")
    # capture1080 = cv2.VideoCapture("./Data/clip1080p.mp4")

    totalLoss = 0
    bEnd = False
    while True:
        frames360 = []
        frames1080 = []

        for i in range(FRAMES_PER_BATCH):
            bSuccess360, frame360 = capture360.read()
            bSuccess1080, frame1080 = capture1080.read()
            
            bEnd = not bSuccess360 or not bSuccess1080
            if bEnd:
                break

            frames360.append(frame360)
            frames1080.append(frame1080)

            del frame360
            del frame1080

        len360 = len(frames360)
        len1080 = len(frames1080)
        if bEnd and (len360 == 0 or len1080 == 0):
            del frames360
            del frames1080
            break

        inputFrames = np.array(frames360)
        targetFrames = np.array(frames1080)

        del frames360
        del frames1080

        inputY = []
        targetY = []

        max = np.max([len360, len1080])
        for i in range(max):
            inputYCrCb = cv2.cvtColor(inputFrames[i], cv2.COLOR_BGR2YCrCb)
            targetYCrCb = cv2.cvtColor(targetFrames[i], cv2.COLOR_BGR2YCrCb)

            inputY.append(inputYCrCb[:, :, 0])
            targetY.append(targetYCrCb[:, :, 0])

            del inputYCrCb
            del targetYCrCb

        del inputFrames
        del targetFrames

        inputYNumpy = np.array(inputY)
        targetYNumpy = np.array(targetY)

        del inputY
        del targetY

        inputTensor = (torch.from_numpy(inputYNumpy).float().unsqueeze(1) / 255.0).cuda()
        targetTensor = (torch.from_numpy(targetYNumpy).float().unsqueeze(1) / 255.0).cuda()

        del inputYNumpy
        del targetYNumpy

        output = model(inputTensor)
        loss = criterion(output, targetTensor)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        totalLoss += loss.item()

        del inputTensor
        del targetTensor
        del output
        torch.cuda.empty_cache()
    
    print(f"Epoch: {epoch}, Loss: {totalLoss}")

    capture1080.release()
    capture360.release()

torch.save(model.state_dict(), "ESPCN.pth")

Using 'cuda' device


KeyboardInterrupt: 

In [4]:
model = ESPCN(3)
model.load_state_dict(torch.load("ESPCN.pth", map_location = "cuda"))
model = model.cuda()
model.eval()

capture360 = cv2.VideoCapture("./Data/TestBaseball360p.mp4")
capture1080 = cv2.VideoCapture("./Data/TestBaseball1080p.mp4")

cv2.namedWindow("SuperResolution", cv2.WINDOW_NORMAL)
while True:
    bSuccess360, frame360 = capture360.read()
    bSuccess1080, frame1080 = capture1080.read()
    
    bEnd = not bSuccess360 or not bSuccess1080
    if bEnd:
        break

    yCrCb = cv2.cvtColor(frame360, cv2.COLOR_BGR2YCrCb)
    y = yCrCb[:, :, 0]
    cr = yCrCb[:, :, 1]
    cb = yCrCb[:, :, 2]
    
    yTensor = torch.from_numpy(y).float().unsqueeze(0).unsqueeze(0) / 255.0
    yTensor = yTensor.cuda()
    
    with torch.no_grad():
        outputY = model(yTensor).clamp(0.0, 1.0)
        
    outputY = outputY.squeeze().cpu().numpy() * 255.0
    outputY = outputY.astype(np.uint8)

    height, width = outputY.shape
    crTensor = torch.from_numpy(cr).unsqueeze(0).unsqueeze(0) / 255.0
    crTensor = crTensor.cuda()

    resizedCr = F.interpolate(crTensor, size = (height, width), mode = "bicubic", align_corners = False)

    crUp = (resizedCr.squeeze().clamp(0.0, 1.0) * 255).byte().cpu().numpy()

    cbTensor = torch.from_numpy(cb).unsqueeze(0).unsqueeze(0) / 255.0
    cbTensor = cbTensor.cuda()

    resizedCb = F.interpolate(cbTensor, size = (height, width), mode = "bicubic", align_corners = False)

    cbUp = (resizedCb.squeeze().clamp(0.0, 1.0) * 255).byte().cpu().numpy()

    yCrCbUp = cv2.merge([outputY, crUp, cbUp])
    rgbUpscaled = cv2.cvtColor(yCrCbUp, cv2.COLOR_YCrCb2BGR)

    combined = np.hstack((frame1080, rgbUpscaled))

    cv2.imshow("SuperResolution", combined)
        
    if cv2.waitKey(33) & 0xFF == ord('q'):
        break;

cv2.destroyAllWindows()

capture1080.release()
capture360.release()

  model.load_state_dict(torch.load("ESPCN.pth", map_location = "cuda"))


In [3]:
%pip install onnx

Collecting onnx
  Downloading onnx-1.17.0-cp312-cp312-win_amd64.whl.metadata (16 kB)
Downloading onnx-1.17.0-cp312-cp312-win_amd64.whl (14.5 MB)
   ---------------------------------------- 0.0/14.5 MB ? eta -:--:--
   -------------------------------------- - 13.9/14.5 MB 79.3 MB/s eta 0:00:01
   ---------------------------------------- 14.5/14.5 MB 65.2 MB/s eta 0:00:00
Installing collected packages: onnx
Successfully installed onnx-1.17.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import torch

scale = 3
model = ESPCN(scale)
model.load_state_dict(torch.load("ESPCN.pth", map_location="cpu"))
model.eval()

dummy_input = torch.randn(1, 1, 360, 640)

# 4. ONNX로 변환
torch.onnx.export(
    model,
    dummy_input,
    "ESPCN.onnx",
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
    opset_version=11  # ONNX 변환 최소 버전은 11 이상을 권장
)

print("ONNX 변환 완료: ESPCN.onnx")

  model.load_state_dict(torch.load("ESPCN.pth", map_location="cpu"))


ONNX 변환 완료: ESPCN.onnx
