Q1

In [14]:
%%writefile q1.cu
#include <stdio.h>

__global__ void add(int *a, int *b, int *c)
{
  int i = blockIdx.x*blockDim.x + threadIdx.x;
  c[i] = a[i] + b[i];
}

int main()
{
  int N = 1024;
  int a[1024];
  int b[1024];
  int c[1024];

  for (int i = 0; i < N; i++)
  {
    a[i] = i*1.0f;
    b[i] = i*2.0f;
  }
  int *d_a, *d_b, *d_c;
  int size = N*sizeof(int);
  cudaMalloc((void **) &d_a, size);
  cudaMalloc((void **) &d_b, size);
  cudaMalloc((void **) &d_c, size);

  cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
  cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice);

  add<<<32, 32>>>(d_a, d_b, d_c);

  cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);
  for (int i = 0; i < 5; i++)
  {
    printf("C[%d] = %d\n", i, c[i]);
  }
  cudaFree(d_a);
  cudaFree(d_b);
  cudaFree(d_c);
  return 0;
}

Overwriting q1.cu


In [15]:
!nvcc -arch=sm_75 q1.cu -o q1

In [16]:
!./q1

C[0] = 0
C[1] = 3
C[2] = 6
C[3] = 9
C[4] = 12


Q2

In [28]:
%%writefile q2.cu
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/transform.h>

int main()
{
  int N = 1024;
  thrust::device_vector<int> A(N);
  thrust::device_vector<int> B(N);
  thrust::device_vector<int> C(N);

  for (int i = 0; i < N; i++)
  {
    A[i] = i*1.0f;
    B[i] = i*2.0f;
  }
  thrust::transform(A.begin(), A.end(), B.begin(), C.begin(), thrust::plus<int>());

  for (int i = 0; i < 5; i++)
  {
    std::cout << C[i] << std::endl;
  }

}


Overwriting q2.cu


In [25]:
!nvcc -arch=sm_75 q2.cu -o q2

In [26]:
!./q2

0
3
6
9
12


In [41]:
%%writefile q3.cu
#include <iostream>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/reduce.h>
#include <thrust/inner_product.h>
#include <chrono>

int main()
{
  int N = 1024;
  thrust::device_vector<int> A(N);
  thrust::device_vector<int> B(N);
  thrust::device_vector<int> C(N);

  for (int i = 0; i < N; i++)
  {
    A[i] = i*1.0f;
    B[i] = i*2.0f;
  }
  auto start = std::chrono::high_resolution_clock::now();
  int result = thrust::inner_product(
    A.begin(), A.end(),
    B.begin(),
    0
);
  auto end = std::chrono::high_resolution_clock::now();
std::cout << result << std::endl;
std::cout << "Time Taken : " << end-start << std::endl;


}


Overwriting q3.cu


In [42]:
!nvcc -arch=sm_75 q3.cu -o q3

[01m[0m[01mq3.cu(28)[0m: [01;31merror[0m: no operator "[01m<<[0m" matches these operands
            operand types are: std::basic_ostream<char, std::char_traits<char>> << std::chrono::duration<int64_t, std::nano>
  std::cout << "Time Taken : " << end-start << std::endl;
                               ^
[01m[0m[01m/usr/include/c++/11/ostream(283)[0m: [01;36mnote[0m #3326-D: function [01m"std::basic_ostream<_CharT, _Traits>::operator<<(std::basic_ostream<_CharT, _Traits>::__streambuf_type *) [with _CharT=char, _Traits=std::char_traits<char>]"[0m does not match because argument #1 does not match parameter
        operator<<(__streambuf_type* __sb);
        ^
[01m[0m[01m/usr/include/c++/11/ostream(250)[0m: [01;36mnote[0m #3326-D: function [01m"std::basic_ostream<_CharT, _Traits>::operator<<(std::nullptr_t) [with _CharT=char, _Traits=std::char_traits<char>]"[0m does not match because argument #1 does not match parameter
        operator<<(nullptr_t)
        ^
[01m

In [40]:
!./q3

714779648
