Skip to content

Commit

Permalink
add weight property to exllamav2 quanlinear (#80)
Browse files Browse the repository at this point in the history
  • Loading branch information
flozi00 committed Nov 29, 2023
1 parent cb96f12 commit 360ad4c
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion server/lorax_server/utils/gptq/exllamav2.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ def temp_fwd_size(self, max_input_len, max_batch_size):
def scratch_space_fixed(self, max_input_len=2048, max_batch_size=8):
return self.temp_dq_size() + self.temp_fwd_size(max_input_len, max_batch_size)

@property
def weight(self) -> torch.Tensor:
return self.qweight


class ExLlamaV2DeviceTensors:

Expand All @@ -158,4 +162,4 @@ def get_scratch_slice(self, size_bytes):
size_bytes = ((size_bytes + 127) // 128) * 128
size_half = size_bytes // 2
scratch_slice = self.scratch.narrow(0, 0, size_half)
return scratch_slice
return scratch_slice

0 comments on commit 360ad4c

Please sign in to comment.