Skip to content

Commit

Permalink
Fix ExLlamaV2 loaders using unnecessary "bits" metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
oobabooga committed Mar 31, 2024
1 parent 624faa1 commit db5f6cd
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion modules/models_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def get_model_metadata(model):
if metadata['rope_scaling']['type'] == 'linear':
model_settings['compress_pos_emb'] = metadata['rope_scaling']['factor']

if 'quantization_config' in metadata:
# Read GPTQ metadata for old GPTQ loaders
if 'quantization_config' in metadata and metadata['quantization_config'].get('quant_method', '') != 'exl2':
if 'bits' in metadata['quantization_config']:
model_settings['wbits'] = metadata['quantization_config']['bits']
if 'group_size' in metadata['quantization_config']:
Expand Down

0 comments on commit db5f6cd

Please sign in to comment.