In [None]:
%%writefile proj.cu

In [None]:
//lib for i/o, read & save images, cuda functionality
#include <iostream>
#include </content/stb_image.h>
#include </content/stb_image_write.h>
#include <cuda_runtime.h>


using namespace std;

In [None]:
//define & apply filter (laplacian)
__constant__ int lapfilter[3][3] = {
    {0, 1, 0},
    {1, -4, 1},
    {0, 1, 0}
};

__global__ void applyfilter(const unsigned char* inputimg, unsigned char* o, int width, int height)
{
    int i = blockIdx.y*blockDim.y+threadIdx.y;
    int j = blockIdx.x*blockDim.x+threadIdx.x;


    if (i > 0 && i < height-1 && j > 0 && j < width-1) {
        int sum = 0;
        for (int k = 0; k < 3; k++) {
            for (int l = 0; l < 3; l++) {
                sum += lapfilter[k][l]*inputimg[(i-1+k)*width + (j-1+l)];
            }
        }
        o[i*width+j] = min(max(sum, 0), 255);
    }
}

In [None]:
int main() {


    int width, height, channels;
    unsigned char* inputimg = stbi_load("/content/testimg.jpg", &width, &height, &channels, 1);
    if (!inputimg)
    {
        cout << "Failed to load image." << endl;
        return -1;
    }

    // allocate mem on device
    unsigned char* deviceInputImg;
    unsigned char* deviceFilteredImg;
    cudaMalloc(&deviceInputImg, width*height*sizeof(unsigned char));
    cudaMalloc(&deviceFilteredImg, width*height*sizeof(unsigned char));


    // copy from host to device
    cudaMemcpy(deviceInputImg, inputimg, width*height*sizeof(unsigned char), cudaMemcpyHostToDevice);


    // set block & grid dim (grid must be bigger than image)
    dim3 blockDim(8, 8);
    dim3 gridDim((width+blockDim.x-1) / blockDim.x, (height+blockDim.y-1) / blockDim.y);

    // apply filter
    applyfilter<<<gridDim, blockDim>>>(deviceInputImg, deviceFilteredImg, width, height);


    // copy from device to host
    unsigned char* filtered_img = new unsigned char[width*height];
    cudaMemcpy(filtered_img, deviceFilteredImg, width*height*sizeof(unsigned char), cudaMemcpyDeviceToHost);


    // create output image and save
    unsigned char* outputimg = new unsigned char[width*height];
    for (int i = 0; i < width*height; i++) {
        outputimg[i] = filtered_img[i];
    }


    int filtered=stbi_write_jpg("/content/testimgfiltered.jpg", width, height, 1, outputimg, 100);


    if (filtered)
      cout << "Filter applied successfully." << endl;
    else
      cout << "Failed to apply filter." << endl;


    // free device mem
    cudaFree(deviceInputImg);
    cudaFree(deviceFilteredImg);


    // free host mem
    delete[] inputimg;
    delete[] outputimg;


    return 0;
}

In [None]:
!nvcc proj.cu /content/stb_image.c /content/stb_image_write.c -o proj
!./proj