In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [2]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git

Collecting git+https://github.com/andreinechaev/nvcc4jupyter.git
  Cloning https://github.com/andreinechaev/nvcc4jupyter.git to /tmp/pip-req-build-a0iuno1_
  Running command git clone --filter=blob:none --quiet https://github.com/andreinechaev/nvcc4jupyter.git /tmp/pip-req-build-a0iuno1_
  Resolved https://github.com/andreinechaev/nvcc4jupyter.git to commit 5741c522547756ac4bb7a16df32106a15efb8a57
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: nvcc4jupyter
  Building wheel for nvcc4jupyter (pyproject.toml) ... [?25l[?25hdone
  Created wheel for nvcc4jupyter: filename=nvcc4jupyter-1.2.1-py3-none-any.whl size=10741 sha256=1c43b610d84440f376c57bb0b3d20f87e2433fdb6f06eb98d5fd7c81c461a971
  Stored in directory: /tmp/pip-ephem-wheel-cache-swespr8u/wheels/a8/b9/18/23f8ef71ceb0f63297dd1903aedd067e6243a68ea756d6feea
Successfully bu

In [3]:
%load_ext nvcc4jupyter

Detected platform "Colab". Running its setup...
Source files will be saved in "/tmp/tmp6o10h7vl".


In [4]:
%%cuda
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>

#define N 256 // Assuming ASCII characters

// Data structure for a node in Huffman tree
struct Node {
    char data;
    int frequency;
    Node* left;
    Node* right;
};

// CUDA kernel for Huffman encoding
__global__ void huffmanEncode(Node* nodes, char* input, int* output, int size) {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
    if (i < size) {
        char symbol = input[i];
        // Assuming nodes are already constructed and stored in the nodes array
        for (int j = 0; j < N; ++j) {
            if (nodes[j].data == symbol) {
                // Use output array to store encoded bits (for simplicity)
                output[i] = nodes[j].frequency; // Replace with actual encoding logic
                break;
            }
        }
    }
}

// Function to print integer as binary
void printBinary(int n) {
    for (int i = 7; i >= 0; --i) {
        printf("%d", (n >> i) & 1);
    }
}

int main() {
    // Input data
    char input[] = "hello world";
    int size = strlen(input);

    // Host data structures
    Node h_nodes[N];
    int* h_output = (int*)malloc(size * sizeof(int));

    // Device data structures
    Node* d_nodes;
    char* d_input;
    int* d_output;

    // Allocate memory for device data structures
    cudaMalloc((void**)&d_nodes, N * sizeof(Node));
    cudaMalloc((void**)&d_input, size * sizeof(char));
    cudaMalloc((void**)&d_output, size * sizeof(int));

    // Initialize host data structures (for simplicity, assuming known frequencies)
    for (int i = 0; i < N; ++i) {
        h_nodes[i].data = i; // ASCII character
        h_nodes[i].frequency = i % 5 + 1; // Assigning some arbitrary frequencies
    }

    // Copy host data structures to device
    cudaMemcpy(d_nodes, h_nodes, N * sizeof(Node), cudaMemcpyHostToDevice);
    cudaMemcpy(d_input, input, size * sizeof(char), cudaMemcpyHostToDevice);

    // Define grid and block size
    dim3 dimGrid(ceil(size / 256.0), 1, 1);
    dim3 dimBlock(256, 1, 1);

    // Create CUDA events for timing
    cudaEvent_t start, stop;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);

    // Record start event
    cudaEventRecord(start);

    // Launch huffmanEncode kernel on GPU
    huffmanEncode<<<dimGrid, dimBlock>>>(d_nodes, d_input, d_output, size);

    // Record stop event
    cudaEventRecord(stop);
    cudaEventSynchronize(stop);

    // Copy result from device to host
    cudaMemcpy(h_output, d_output, size * sizeof(int), cudaMemcpyDeviceToHost);

    // Print encoded bits and character frequencies
    printf("Character: Integer (Binary)\n");
    for (int i = 0; i < size; ++i) {
        printf("%c: %d (", input[i], h_output[i]);
        printBinary(h_output[i]);
        printf(")\n");
    }

    // Free device memory
    cudaFree(d_nodes);
    cudaFree(d_input);
    cudaFree(d_output);

    // Free host memory
    free(h_output);

    // Print additional information
    size_t freeMem, totalMem;
    cudaMemGetInfo(&freeMem, &totalMem);
    printf("\nFree Memory :: %ld\n\n", freeMem);

    printf("Input File Size :: %d\n", size);
    printf("Output Size :: %d\n", size * sizeof(int));
    printf("Number of Kernels :: 1\n");
    printf("Integer Overflow flag :: 0\n\n");

    printf("Free Mem: %ld\n", freeMem);

    cudaError_t err = cudaGetLastError();
    if (err != cudaSuccess) {
        printf("Error Kernel 1 :: %s\n", cudaGetErrorString(err));
    } else {
        printf("Error Kernel 1 :: no error\n");
    }

    // Calculate and print the time taken
    float milliseconds = 0;
    cudaEventElapsedTime(&milliseconds, start, stop);
    printf("Time taken: %.3f ms\n", milliseconds);

    // Destroy CUDA events
    cudaEventDestroy(start);
    cudaEventDestroy(stop);

    return 0;
}



Character: Integer (Binary)
h: 5 (00000101)
e: 2 (00000010)
l: 4 (00000100)
l: 4 (00000100)
o: 2 (00000010)
 : 3 (00000011)
w: 5 (00000101)
o: 2 (00000010)
r: 5 (00000101)
l: 4 (00000100)
d: 1 (00000001)

Free Memory :: 15727656960

Input File Size :: 11
Output Size :: 44
Number of Kernels :: 1
Integer Overflow flag :: 0

Free Mem: 15727656960
Error Kernel 1 :: no error
Time taken: 123.122 ms

