In [None]:
!apt-get update
!apt-get install -y build-essential cmake git
!apt-get install -y libopencv-dev
!pip install opencv-python

In [None]:
%%writefile median_gpu.cu
#include <opencv2/opencv.hpp>
#include <iostream>
#include <vector>
#include <filesystem>
#include <chrono>
#include <string>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cmath>

namespace fs = std::filesystem;
using namespace cv;
using namespace std;

#define CUDA_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) {
    if (code != cudaSuccess) {
        fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}

__global__ void medianFilterKernel(unsigned char* d_input, unsigned char* d_output, int width, int height, int channels, int ksize) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x >= width || y >= height) return;

    int halfK = ksize / 2;
    int windowSize = ksize * ksize;
    unsigned char window[25];

    for (int c = 0; c < channels; ++c) {
        int idx = 0;
        for (int ky = -halfK; ky <= halfK; ++ky) {
            for (int kx = -halfK; kx <= halfK; ++kx) {
                int px = min(max(x + kx, 0), width - 1);
                int py = min(max(y + ky, 0), height - 1);
                window[idx++] = d_input[(py * width + px) * channels + c];
            }
        }

        for (int i = 0; i < windowSize - 1; ++i) {
            for (int j = i + 1; j < windowSize; ++j) {
                if (window[i] > window[j]) {
                    unsigned char temp = window[i];
                    window[i] = window[j];
                    window[j] = temp;
                }
            }
        }

        d_output[(y * width + x) * channels + c] = window[windowSize / 2];
    }
}

Mat medianFilterGPU(const Mat& image, int ksize) {
    int width = image.cols;
    int height = image.rows;
    int channels = image.channels();

    size_t imageSize = width * height * channels * sizeof(unsigned char);

    unsigned char *d_input, *d_output;
    CUDA_CHECK(cudaMalloc(&d_input, imageSize));
    CUDA_CHECK(cudaMalloc(&d_output, imageSize));

    CUDA_CHECK(cudaMemcpy(d_input, image.data, imageSize, cudaMemcpyHostToDevice));

    dim3 blockSize(16, 16);
    dim3 gridSize((width + blockSize.x - 1) / blockSize.x, (height + blockSize.y - 1) / blockSize.y);

    medianFilterKernel<<<gridSize, blockSize>>>(d_input, d_output, width, height, channels, ksize);
    cudaDeviceSynchronize();

    Mat result(image.size(), image.type());
    CUDA_CHECK(cudaMemcpy(result.data, d_output, imageSize, cudaMemcpyDeviceToHost));

    CUDA_CHECK(cudaFree(d_input));
    CUDA_CHECK(cudaFree(d_output));

    return result;
}

double psnr(const Mat& I1, const Mat& I2) {
    CV_Assert(I1.size() == I2.size() && I1.type() == I2.type());

    Mat s1;
    absdiff(I1, I2, s1);
    s1.convertTo(s1, CV_32F);

    s1 = s1.mul(s1);
    Scalar s = sum(s1);
    double sse = s.val[0] + s.val[1] + s.val[2];

    if (sse <= 1e-10) return 0;

    double mse = sse / (I1.channels() * I1.total());
    double psnr = 10.0 * log10((255 * 255) / mse);
    return psnr;
}

bool endsWith(const string& str, const string& suffix) {
    return str.size() >= suffix.size() && str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
}

void processImage(const string& imagePath, const string& outputPath, int ksize, double& totalInferenceTime, double& totalPSNR, int& imageCount) {
    Mat image = imread(imagePath, IMREAD_COLOR);
    if (image.empty()) {
        cerr << "Error loading image: " << imagePath << endl;
        return;
    }

    if (image.rows > 720 || image.cols > 1280) {
        cerr << "Image size exceeds the allowed dimensions of 1280x720p" << endl;
        return;
    }

    auto start = chrono::high_resolution_clock::now();
    Mat result = medianFilterGPU(image, ksize);
    auto end = chrono::high_resolution_clock::now();
    double inference_time = chrono::duration<double, milli>(end - start).count();

    imwrite(outputPath, result);

    double psnrValue = psnr(image, result);

    totalInferenceTime += inference_time;
    totalPSNR += psnrValue;
    ++imageCount;
}

void printGPUInfo() {
    cout << "GPU Metrics:" << endl;
    system("nvidia-smi --query-gpu=memory.used,utilization.gpu --format=csv,nounits,noheader | awk -F', ' '{print \"GPU Memory Usage (MB): \" $1 \" MB\"; print \"GPU Load (%): \" $2}'");
}

void processDataset(const string& rootFolder, int ksize) {
    vector<string> noisyFolders = {"noisy5", "noisy15", "noisy35"};
    vector<string> denoisedFolders = {"denoised5", "denoised15", "denoised35"};

    double totalInferenceTime = 0;
    double totalPSNR = 0;
    int imageCount = 0;

    auto startTotalTime = chrono::high_resolution_clock::now();

    for (size_t i = 0; i < noisyFolders.size(); ++i) {
        string inputFolder = rootFolder + "/" + noisyFolders[i];
        string outputFolder = rootFolder + "/" + denoisedFolders[i];

        if (!fs::exists(outputFolder)) {
            fs::create_directory(outputFolder);
        }

        for (const auto& entry : fs::directory_iterator(inputFolder)) {
            string imagePath = entry.path().string();
            if (endsWith(imagePath, ".png") || endsWith(imagePath, ".jpg") || endsWith(imagePath, ".jpeg")) {
                string outputPath = outputFolder + "/" + entry.path().filename().string();
                processImage(imagePath, outputPath, ksize, totalInferenceTime, totalPSNR, imageCount);
            }
        }
    }

    auto endTotalTime = chrono::high_resolution_clock::now();
    double totalTimeTaken = chrono::duration<double, milli>(endTotalTime - startTotalTime).count();

    if (imageCount > 0) {
        double avgInferenceTime = totalInferenceTime / imageCount;
        double avgPSNR = totalPSNR / imageCount;

        cout << "Average Inference Time (ms): " << avgInferenceTime << endl;
        cout << "Average PSNR (dB): " << avgPSNR << endl;

        printGPUInfo();
    } else {
        cout << "No images processed." << endl;
    }
}

int main() {
    string rootFolder = "/content/drive/MyDrive/dataset";
    int ksize = 5;

    processDataset(rootFolder, ksize);

    return 0;
}


In [None]:
!g++ -o image_denoising image_denoising.cpp `pkg-config --cflags --libs opencv4`
!./image_denoising