Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/nomic-ai/llama.cpp into n…
Browse files Browse the repository at this point in the history
…omic-mpt
  • Loading branch information
cebtenzzre committed Oct 19, 2023
2 parents 34a3fae + ffe96e1 commit a8ed8c8
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 675 deletions.
11 changes: 1 addition & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ if (LLAMA_KOMPUTE)

if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt")
message(STATUS "Kompute found")
set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level")
add_subdirectory(kompute)

# Compile our shaders
Expand All @@ -479,12 +480,7 @@ if (LLAMA_KOMPUTE)
kompute/op_norm.comp
kompute/op_rmsnorm.comp
kompute/op_diagmask.comp
kompute/op_mul_mat_mat_f16.comp
kompute/op_mul_mat_mat_f32.comp
kompute/op_mul_mat_mat_q4_0.comp
kompute/op_mul_mat_mat_q4_1.comp
kompute/op_mul_mat_mat_q8_0.comp
kompute/op_mul_mat_mat_q6_k.comp
kompute/op_mul_mat_f16.comp
kompute/op_mul_mat_q8_0.comp
kompute/op_mul_mat_q4_0.comp
Expand Down Expand Up @@ -515,12 +511,7 @@ if (LLAMA_KOMPUTE)
shaderop_norm.h
shaderop_rmsnorm.h
shaderop_diagmask.h
shaderop_mul_mat_mat_f16.h
shaderop_mul_mat_mat_f32.h
shaderop_mul_mat_mat_q4_0.h
shaderop_mul_mat_mat_q4_1.h
shaderop_mul_mat_mat_q8_0.h
shaderop_mul_mat_mat_q6_k.h
shaderop_mul_mat_f16.h
shaderop_mul_mat_q8_0.h
shaderop_mul_mat_q4_0.h
Expand Down
333 changes: 30 additions & 303 deletions ggml-vulkan.cpp

Large diffs are not rendered by default.

56 changes: 0 additions & 56 deletions kompute/op_mul_mat_mat_f16.comp

This file was deleted.

21 changes: 14 additions & 7 deletions kompute/op_mul_mat_mat_f32.comp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_EXT_debug_printf : enable

// layout(local_size_x = 8) in;
// device subgroup size
layout (local_size_x_id = 0) in;

layout(binding = 0) readonly buffer tensorInA { float inA[]; };
layout(binding = 1) readonly buffer tensorInB { float inB[]; };
Expand All @@ -40,14 +41,20 @@ pcs;


void main() {
uvec3 gid = gl_GlobalInvocationID;
uvec3 gid = gl_WorkGroupID;

const uint x = (gid.x*pcs.nb01 + gid.z/(pcs.ne12/pcs.ne02)*pcs.nb02) / 4 + pcs.inAOff; // Based from inA
const uint y = (gid.y*pcs.nb11 + gid.z/(pcs.ne02/pcs.ne12)*pcs.nb12) / 4 + pcs.inBOff; // based from inB
uint bc_ab = pcs.ne12 > pcs.ne02 ? gid.z / (pcs.ne12 / pcs.ne02) : gid.z;
uint bc_ba = pcs.ne02 > pcs.ne12 ? gid.z / (pcs.ne02 / pcs.ne12) : gid.z;

const uint x = (gid.x*pcs.nb01 + bc_ab*pcs.nb02) / 4 + pcs.inAOff; // Based from inA
const uint y = (gid.y*pcs.nb11 + bc_ba*pcs.nb12) / 4 + pcs.inBOff; // based from inB
float sum = 0.0f;
for (uint i = 0; i < pcs.ne00; i ++) {
for (uint i = gl_SubgroupInvocationID.x; i < pcs.ne00; i += gl_SubgroupSize) {
sum += float(inA[x+i]) * float(inB[y+i]);
}

out_[gid.z*(pcs.nb2/4) + gid.y*(pcs.nb1/4) + gid.x + pcs.outOff] = sum;
}
const float all_sum = subgroupAdd(sum);
if (subgroupElect()) {
out_[gid.z*(pcs.nb2/4) + gid.y*(pcs.nb1/4) + gid.x + pcs.outOff] = all_sum;
}
}
71 changes: 0 additions & 71 deletions kompute/op_mul_mat_mat_q4_0.comp

This file was deleted.

73 changes: 0 additions & 73 deletions kompute/op_mul_mat_mat_q4_1.comp

This file was deleted.

88 changes: 0 additions & 88 deletions kompute/op_mul_mat_mat_q6_k.comp

This file was deleted.

Loading

0 comments on commit a8ed8c8

Please sign in to comment.