In [19]:
%%writefile mm.c
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>

void initializeMatrix(int *matrix, int size)
{
  for(int i = 0; i < size; i++)
    for(int j = 0; j < size; j++)
      matrix[i * size + j] = rand() % (10 - 1) * 1;
}

void printMatrix(int *matrix, int size)
{
  for(int i = 0; i < size; i++)
  {
    for(int j = 0; j < size; j++)
      printf("%d\t", matrix[i * size + j]);
    printf("\n");
  }
  printf("\n");
}

int main (int argc, char **argv)
{
 int size = atoi(argv[1]);  
 int i, j, k;
 double t1, t2;

 int  *A = (int *) malloc (sizeof(int)*size*size);
 int  *B = (int *) malloc (sizeof(int)*size*size);
 int  *C = (int *) malloc (sizeof(int)*size*size);

 initializeMatrix(A, size);
 initializeMatrix(B, size);

 int num_threads;
 int num_tests = 1;

// Sequential code
double sft_total = 0;
for(int g = 0; g < num_tests; g++){
  // SEQUENCIAL CODE
  t1 = omp_get_wtime();
   for(i = 0; i < size; i++)
    for(j = 0; j < size; j++)
      for(k = 0; k < size; k++)
        C[i * size + j] += A[i * size + k] * B[k * size + j];
  t2 = omp_get_wtime();

  double sft = t2-t1; // Sequencial final time

  sft_total += sft;
}

// Parallel code
double pft_total = 0;
  for(int g = 0; g < num_tests; g++){
    t1 = omp_get_wtime();
  #pragma omp parallel for private(i, j, k)
    for(i = 0; i < size; i++){
      if(i == 0) num_threads = omp_get_num_threads();
      for(j = 0; j < size; j++)
        for(k = 0; k < size; k++)
          C[i * size + j] += A[i * size + k] * B[k * size + j];     
    }
    t2 = omp_get_wtime();

    double pft = t2-t1; // Parallel final time

    pft_total += pft;
  }

  double sft_media = sft_total/num_tests;
  double pft_media = pft_total/num_tests;

  double speedup = sft_media/pft_media;

  printf("Size: %d\tSFT: %f\t PFT: %f\t SpeedUp: %f\t Num Threads: %d\n",size, sft_media, pft_media, speedup, num_threads);

 //printMatrix(A,size);
 //printMatrix(B,size);
 //printMatrix(C,size);

 return 0;
}

Writing mm.c


### Run the Code

In [20]:
!gcc mm.c -o mm -fopenmp

### Performance Analysis

In [21]:
%%writefile script.sh
#!/bin/sh

for ((i=100; i<=1000; i+=100))
do
  OMP_NUM_THREADS="$1" ./mm "$i"
done

Writing script.sh


In [22]:
!bash script.sh 2

# !bash script.sh 4

# !bash script.sh 8

# !bash script.sh 16

# !bash script.sh 32

# !bash script.sh 64

Size: 100	SFT: 0.003595	 PFT: 0.001748	 SpeedUp: 2.056225	 Num Threads: 2
Size: 200	SFT: 0.025669	 PFT: 0.013121	 SpeedUp: 1.956328	 Num Threads: 2


Size: 300	SFT: 0.089360	 PFT: 0.045774	 SpeedUp: 1.952195	 Num Threads: 2
Size: 400	SFT: 0.194864	 PFT: 0.106193	 SpeedUp: 1.834997	 Num Threads: 2
Size: 500	SFT: 0.389782	 PFT: 0.215213	 SpeedUp: 1.811144	 Num Threads: 2
Size: 600	SFT: 0.687574	 PFT: 0.364803	 SpeedUp: 1.884785	 Num Threads: 2
Size: 700	SFT: 1.081559	 PFT: 0.565799	 SpeedUp: 1.911560	 Num Threads: 2
Size: 800	SFT: 1.638770	 PFT: 0.868888	 SpeedUp: 1.886053	 Num Threads: 2
Size: 900	SFT: 2.369792	 PFT: 1.207214	 SpeedUp: 1.963025	 Num Threads: 2
Size: 1000	SFT: 3.082959	 PFT: 1.617664	 SpeedUp: 1.905810	 Num Threads: 2


The optimal number of threads is 64

## `Asynchronous Task`

Asynchronous programming is a set of techniques for implementing expensive operations that run concurrently with the rest of the program. One domain where asynchronous programming is often used is in programs with a graphical user interface: it is often unacceptable when the user interface freezes while performing a costly operation. Also, asynchronous operations are essential for parallel applications that need to run multiple tasks simultaneously. The following is a code `asyncTaskOpenMP.c` that represents a task being done asynchronously. Before understanding the code, compile and run it as follows:

In [23]:
%%writefile asyncTaskOpenMP.c
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>

#define SIZE_MATRIX 10

int main(int argc, char **argv)
{
  int n = atoi(argv[1]);
  int block_size = atoi(argv[2]);
  int matrix[SIZE_MATRIX][SIZE_MATRIX], k1 = 10, k2 = 20, k3 = 30, k4 = 40, k5 = 50;
  int i, j, row, column;

  for(i = 0; i < n; i++)
  {
    for(j = 0; j < n; j++)
    {
      matrix[i][j] = 5;
      printf("%d\t", matrix[i][j]);
    }
    printf("\n");
  }

  printf("\n\n");

  omp_set_num_threads(5);

  #pragma omp parallel private(row, column)
  {
    int id = omp_get_thread_num();

    if(id == 0)
    {
      for(row = 0; row < n; row++)
        for(column = block_size*id; column < block_size; column++)
          matrix[row][column] *= k1;
    }

    if(id == 1)
    {
      for(row = 0; row < n; row++)
        for(column = block_size*id; column < 2 * block_size; column++)
          matrix[row][column] *= k2;
    }

    if(id == 2)
    {
      for(row = 0; row < n; row++)
        for(column = block_size*id; column < 3 * block_size; column++)
          matrix[row][column] *= k3;
    }

    if(id == 3)
    {
      for(row = 0; row < n; row++)
        for(column = block_size*id; column < 4 * block_size; column++)
          matrix[row][column] *= k4;
    }

    if(id == 4)
    {
      for(row = 0; row < n; row++)
        for(column = block_size*id; column < 5 * block_size; column++)
          matrix[row][column] *= k5;
    }
  
  }

  for(i = 0; i < n; i++)
  {
    for(j = 0; j < n; j++)
      printf("%d\t", matrix[i][j]);
    printf("\n");
  }

  return 0;
}

Writing asyncTaskOpenMP.c


### Run the Code

In [24]:
!gcc asyncTaskOpenMP.c -o asyncTaskOpenMP -fopenmp

In [25]:
!./asyncTaskOpenMP 10 2

5	5	5	5	5	5	5	5	5	5	
5	5	5	5	5	5	5	5	5	5	
5	5	5	5	5	5	5	5	5	5	
5	5	5	5	5	5	5	5	5	5	
5	5	5	5	5	5	5	5	5	5	
5	5	5	5	5	5	5	5	5	5	
5	5	5	5	5	5	5	5	5	5	
5	5	5	5	5	5	5	5	5	5	
5	5	5	5	5	5	5	5	5	5	
5	5	5	5	5	5	5	5	5	5	


50	50	100	100	150	150	200	200	250	250	
50	50	100	100	150	150	200	200	250	250	
50	50	100	100	150	150	200	200	250	250	
50	50	100	100	150	150	200	200	250	250	
50	50	100	100	150	150	200	200	250	250	
50	50	100	100	150	150	200	200	250	250	
50	50	100	100	150	150	200	200	250	250	
50	50	100	100	150	150	200	200	250	250	
50	50	100	100	150	150	200	200	250	250	
50	50	100	100	150	150	200	200	250	250	


## References

M. Boratto. Hands-On Supercomputing with Parallel Computing. Available: https://github.com/muriloboratto/Hands-On-Supercomputing-with-Parallel-Computing. 2022.

B. Chapman, G. Jost and R. Pas. Using OpenMP: Portable Shared Memory Parallel Programming. The MIT Press, 2007, USA.