# Kokkos SpMV on GPU (Reproducibility Notebook)

This notebook demonstrates the execution of the **GPU-Ready SpMV Kernel** on a Google Colab GPU instance (Tesla T4).

### Instructions:
1. Ensure Runtime is set to **GPU** (Runtime > Change runtime type > T4 GPU).
2. Run all cells below to install Kokkos, Compile, and Execute.

In [None]:
# 1. Check GPU Availability
!nvidia-smi

In [None]:
# 2. Install Dependencies & Clone Kokkos
import os
!apt-get install -y cmake build-essential
if not os.path.exists("kokkos"):
    !git clone https://github.com/kokkos/kokkos.git
    print("Kokkos Cloned successfully.")

In [None]:
# 3. Write Source Code (Module 06: TeamPolicy)
source_code = """
#include <Kokkos_Core.hpp>
#include <cstdio>
#include <vector>

int main(int argc, char* argv[]) {
  Kokkos::initialize(argc, argv);
  {
    const int num_rows = 4;
    const int num_nnz = 7; 

    // Host Data
    std::vector<int> h_row_map = {0, 2, 3, 6, 7};
    std::vector<int> h_col_idx = {0, 3, 1, 0, 2, 3, 3};
    std::vector<double> h_values = {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0};
    
    // Define Execution & Memory Space
    typedef Kokkos::DefaultExecutionSpace::memory_space MemSpace;
    printf("Execution Space: %s\\n", typeid(Kokkos::DefaultExecutionSpace).name());

    // Device Views
    Kokkos::View<int*, MemSpace>      row_map("row_map", num_rows + 1);
    Kokkos::View<int*, MemSpace>      col_idx("col_idx", num_nnz);
    Kokkos::View<double*, MemSpace>   values("values", num_nnz);
    Kokkos::View<double*, MemSpace>   x("x", num_rows);
    Kokkos::View<double*, MemSpace>   y("y", num_rows);

    // Mirror Views (Auto Type)
    auto h_row_map_v = Kokkos::create_mirror_view(row_map);
    auto h_col_idx_v = Kokkos::create_mirror_view(col_idx);
    auto h_values_v  = Kokkos::create_mirror_view(values);
    auto h_x_v       = Kokkos::create_mirror_view(x);

    // Fill Data
    for(int i=0; i<h_row_map.size(); i++) h_row_map_v(i) = h_row_map[i];
    for(int i=0; i<h_col_idx.size(); i++) h_col_idx_v(i) = h_col_idx[i];
    for(int i=0; i<h_values.size(); i++)  h_values_v(i)  = h_values[i];
    for(int i=0; i<num_rows; i++)         h_x_v(i)       = 1.0;

    // Deep Copy Host -> Device
    Kokkos::deep_copy(row_map, h_row_map_v);
    Kokkos::deep_copy(col_idx, h_col_idx_v);
    Kokkos::deep_copy(values, h_values_v);
    Kokkos::deep_copy(x, h_x_v);

    printf("Computing SpMV on Device (TeamPolicy)...\\n");

    // TeamPolicy Kernel
    typedef Kokkos::TeamPolicy<> policy_t;
    typedef policy_t::member_type member_t;

    Kokkos::parallel_for("SpMV", policy_t(num_rows, Kokkos::AUTO), KOKKOS_LAMBDA(const member_t& team_member) {
        int row = team_member.league_rank(); 
        double row_sum = 0.0;
        int row_start = row_map(row);
        int row_end   = row_map(row+1);
        int row_len   = row_end - row_start;

        Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team_member, row_len), 
          [=] (const int k_offset, double& lsum) {
            int k = row_start + k_offset; 
            lsum += values(k) * x(col_idx(k));
        }, row_sum);

        team_member.team_barrier();
        if (team_member.team_rank() == 0) y(row) = row_sum;
    });
    
    Kokkos::fence();

    // Verify Results
    auto h_y = Kokkos::create_mirror_view(y);
    Kokkos::deep_copy(h_y, y);

    printf("Result Y: [ ");
    for(int i=0; i<num_rows; i++) printf("%.1f ", h_y(i));
    printf("]\\n");
  }
  Kokkos::finalize();
  return 0;
}
"""
with open("spmv_gpu.cpp", "w") as f:
    f.write(source_code)
print("Source Code Written: spmv_gpu.cpp")

In [None]:
# 4. Compile with NVCC Wrapper
print("=== COMPILING ===")
import os
cwd = os.getcwd()
nvcc_wrapper_path = os.path.join(cwd, "kokkos/bin/nvcc_wrapper")
!chmod +x {nvcc_wrapper_path}

# Build Kokkos Library
!mkdir -p build && cd build && \
 cmake ../kokkos \
 -DKokkos_ENABLE_CUDA=ON \
 -DKokkos_ENABLE_CUDA_LAMBDA=ON \
 -DCMAKE_CXX_COMPILER={nvcc_wrapper_path} \
 -DCMAKE_BUILD_TYPE=Release && \
 make -j2

# Build App
with open("CMakeLists.txt", "w") as f:
    f.write("""
cmake_minimum_required(VERSION 3.16)
project(SpMV_GPU_Test CXX)
find_package(Kokkos REQUIRED PATHS build)
add_executable(spmv_gpu_exe spmv_gpu.cpp)
target_link_libraries(spmv_gpu_exe Kokkos::kokkos)
""")

print("=== LINKING ===")
!cmake . -DCMAKE_CXX_COMPILER={nvcc_wrapper_path} -DKokkos_DIR=/content/build
!make spmv_gpu_exe

In [None]:
# 5. Execute
!./spmv_gpu_exe