# Exercise 2: Calculate pi with combined constructs

Our goal is to use:
* reduction clause
* combined parallel worksharing constructs: parallel for

This is a continuation of previous exercise when we computed pi. Remove *critical* directive and your additional partial sum variable, then add *reduction* clause and compile. 

In [None]:
#pragma cling load("libomp.so")
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
#include <omp.h>

#define f(A) (4.0/(1.0+A*A))

int num_threads = 4;
omp_set_num_threads(num_threads);

//declarations
const int n = 10000000;
int i;
double w, x, sum, pi;
clock_t t1, t2;
struct timeval tv1, tv2;
struct timezone tz;
double wt1, wt2;

double sum0; //partial sum

#   pragma omp parallel
{ 
#     pragma omp single 
  printf("OpenMP-parallel with %1d threads\n", omp_get_num_threads());
} /* end omp parallel */
# pragma omp barrier

gettimeofday(&tv1, &tz);
wt1 = omp_get_wtime();
t1 = clock();

/* calculate pi = integral [0..1] 4/(1+x**2) dx */
w = 1.0/n;
sum = 0.0;
#pragma omp parallel private(x,sum0), shared(w,sum)
{
    sum0 = 0.0;
    #pragma omp for nowait
    for (i = 1; i <= n; i++)
    {
        x = w*((double)i-0.5);
        sum0 = sum0+f(x);
    }
    # pragma omp critical
    {
        sum = sum+sum0;
    }
} /*end omp parallel*/ 
pi = w*sum;

t2 = clock();
wt2 = omp_get_wtime();
gettimeofday(&tv2, &tz);
printf( "computed pi = %24.16g\n", pi );
printf( "CPU time (clock)                = %12.4g sec\n", (t2-t1)/1000000.0 );
printf( "wall clock time (omp_get_wtime) = %12.4g sec\n", wt2-wt1 );
printf( "wall clock time (gettimeofday)  = %12.4g sec\n", (tv2.tv_sec-tv1.tv_sec) + (tv2.tv_usec-tv1.tv_usec)*1e-6 );

After successful execution, you may compare your result with the provided solution:

In [None]:
#pragma cling load("libomp.so")
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
#include <omp.h>

#define f(A) (4.0/(1.0+A*A))

int num_threads = 4;
omp_set_num_threads(num_threads);

//declarations
const int n = 10000000;
int i;
double w, x, sum, pi;
clock_t t1, t2;
struct timeval tv1, tv2;
struct timezone tz;
double wt1, wt2;

#   pragma omp parallel
{ 
#     pragma omp single 
  printf("OpenMP-parallel with %1d threads\n", omp_get_num_threads());
} /* end omp parallel */
# pragma omp barrier

gettimeofday(&tv1, &tz);
wt1 = omp_get_wtime();
t1 = clock();

/* calculate pi = integral [0..1] 4/(1+x**2) dx */
w = 1.0/n;
sum = 0.0;
#pragma omp parallel private(x), shared(w,sum)
{
    sum0 = 0.0;
    #pragma omp for reduction(+:sum)
    for (i = 1; i <= n; i++)
    {
        x = w*((double)i-0.5);
        sum = sum+f(x);
    }
    
} /*end omp parallel*/ 
pi = w*sum;

t2 = clock();
wt2 = omp_get_wtime();
gettimeofday(&tv2, &tz);
printf( "computed pi = %24.16g\n", pi );
printf( "CPU time (clock)                = %12.4g sec\n", (t2-t1)/1000000.0 );
printf( "wall clock time (omp_get_wtime) = %12.4g sec\n", wt2-wt1 );
printf( "wall clock time (gettimeofday)  = %12.4g sec\n", (tv2.tv_sec-tv1.tv_sec) + (tv2.tv_usec-tv1.tv_usec)*1e-6 );

Is the value of pi correct? **Great!** Now change the parallel region so you use the combined construct *parallel for* and compile. 

After successful execution, you may compare your result with the provided solution:

In [None]:
#pragma cling load("libomp.so")
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
#include <omp.h>

#define f(A) (4.0/(1.0+A*A))

int num_threads = 4;
omp_set_num_threads(num_threads);

//declarations
const int n = 10000000;
int i;
double w, x, sum, pi;
clock_t t1, t2;
struct timeval tv1, tv2;
struct timezone tz;
double wt1, wt2;

#   pragma omp parallel
{ 
#     pragma omp single 
  printf("OpenMP-parallel with %1d threads\n", omp_get_num_threads());
} /* end omp parallel */
# pragma omp barrier

gettimeofday(&tv1, &tz);
wt1 = omp_get_wtime();
t1 = clock();

/* calculate pi = integral [0..1] 4/(1+x**2) dx */
w = 1.0/n;
sum = 0.0;
#pragma omp parallel for private(x) shared(w) reduction(+:sum)
for (i = 1; i <= n; i++)
{
    x = w*((double)i-0.5);
    sum = sum+f(x);
}
/*end omp parallel*/ 
pi = w*sum;

t2 = clock();
wt2 = omp_get_wtime();
gettimeofday(&tv2, &tz);
printf( "computed pi = %24.16g\n", pi );
printf( "CPU time (clock)                = %12.4g sec\n", (t2-t1)/1000000.0 );
printf( "wall clock time (omp_get_wtime) = %12.4g sec\n", wt2-wt1 );
printf( "wall clock time (gettimeofday)  = %12.4g sec\n", (tv2.tv_sec-tv1.tv_sec) + (tv2.tv_usec-tv1.tv_usec)*1e-6 );