In [1]:
%%writefile matrix_mul.cu
#include <iostream>
#include <cuda_runtime.h>
using namespace std;

// CUDA kernel for matrix multiplication
__global__ void matMul(int* A, int* B, int* C, int N) {
    int row = threadIdx.y;
    int col = threadIdx.x;
    int sum = 0;
    for (int i = 0; i < N; i++)
        sum += A[row * N + i] * B[i * N + col];
    C[row * N + col] = sum;
}

// Print matrix
void print(const int* m, int N) {
    for (int i = 0; i < N * N; i++) {
        cout << m[i] << " ";
        if ((i + 1) % N == 0) cout << "\n";
    }
    cout << "\n";
}

int main() {
    int N;
    cout << "Enter size of NxN matrix (e.g., 2 for 2x2): ";
    cin >> N;

    int size = N * N;
    int bytes = size * sizeof(int);

    int* A = new int[size];
    int* B = new int[size];
    int* C = new int[size];
    int *dA, *dB, *dC;

    // User input for matrix A
    cout << "Enter " << size << " elements for matrix A:\n";
    for (int i = 0; i < size; i++) cin >> A[i];

    // User input for matrix B
    cout << "Enter " << size << " elements for matrix B:\n";
    for (int i = 0; i < size; i++) cin >> B[i];

    // Allocate device memory
    cudaMalloc(&dA, bytes);
    cudaMalloc(&dB, bytes);
    cudaMalloc(&dC, bytes);

    // Copy data to device
    cudaMemcpy(dA, A, bytes, cudaMemcpyHostToDevice);
    cudaMemcpy(dB, B, bytes, cudaMemcpyHostToDevice);

    // Launch kernel
    matMul<<<1, dim3(N, N)>>>(dA, dB, dC, N);

    // Copy result back
    cudaMemcpy(C, dC, bytes, cudaMemcpyDeviceToHost);

    // Display results
    cout << "Matrix A:\n"; print(A, N);
    cout << "Matrix B:\n"; print(B, N);
    cout << "Matrix A x B:\n"; print(C, N);

    // Free memory
    cudaFree(dA); cudaFree(dB); cudaFree(dC);
    delete[] A; delete[] B; delete[] C;

    return 0;
}


Writing matrix_mul.cu


In [3]:
!nvcc -arch=sm_75 matrix_mul.cu -o matrix_mul
!./matrix_mul


Enter size of NxN matrix (e.g., 2 for 2x2): 3
Enter 9 elements for matrix A:
4 6 3 2 1 8 9 9 2
Enter 9 elements for matrix B:
9 7 8 2 3 6 1 4 2
Matrix A:
4 6 3 
2 1 8 
9 9 2 

Matrix B:
9 7 8 
2 3 6 
1 4 2 

Matrix A x B:
51 58 74 
28 49 38 
101 98 130 

