-
Couldn't load subscription status.
- Fork 329
prep for mlx-swift 0.29.1 #411
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -142,24 +142,28 @@ class SwitchLinear: Module, Quantizable { | |
| return result | ||
| } | ||
|
|
||
| func toQuantized(groupSize: Int = 64, bits: Int = 4) -> Module { | ||
| QuantizedSwitchLinear(self, groupSize: groupSize, bits: bits) | ||
| func toQuantized(groupSize: Int = 64, bits: Int = 4, mode: QuantizationMode) -> Module { | ||
| QuantizedSwitchLinear(self, groupSize: groupSize, bits: bits, mode: mode) | ||
| } | ||
| } | ||
|
|
||
| class QuantizedSwitchLinear: SwitchLinear, Quantized { | ||
| @ModuleInfo(key: "scales") var scales: MLXArray | ||
| @ModuleInfo(key: "biases") var biases: MLXArray | ||
| @ModuleInfo(key: "biases") var biases: MLXArray? | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. biases are now optional |
||
|
|
||
| let groupSize: Int | ||
| let bits: Int | ||
| let mode: QuantizationMode | ||
|
|
||
| init(_ other: SwitchLinear, groupSize: Int = 64, bits: Int = 4) { | ||
| init( | ||
| _ other: SwitchLinear, groupSize: Int = 64, bits: Int = 4, mode: QuantizationMode = .affine | ||
| ) { | ||
| self.groupSize = groupSize | ||
| self.bits = bits | ||
| self.mode = mode | ||
|
|
||
| let (quantizedWeight, scales, biases) = MLX.quantized( | ||
| other.weight, groupSize: groupSize, bits: bits) | ||
| other.weight, groupSize: groupSize, bits: bits, mode: mode) | ||
|
|
||
| self._scales.wrappedValue = scales | ||
| self._biases.wrappedValue = biases | ||
|
|
@@ -183,6 +187,7 @@ class QuantizedSwitchLinear: SwitchLinear, Quantized { | |
| transpose: true, | ||
| groupSize: self.groupSize, | ||
| bits: self.bits, | ||
| mode: mode, | ||
| sortedIndices: sortedIndices | ||
| ) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| // Copyright © 2025 Apple Inc. | ||
|
|
||
| import Foundation | ||
| import MLX | ||
|
|
||
| /// Base ``LanguageModel`` configuration -- provides `modelType` | ||
| /// and `quantization` (used in loading the model). | ||
|
|
@@ -18,20 +19,15 @@ public struct BaseConfiguration: Codable, Sendable { | |
|
|
||
| public let groupSize: Int | ||
| public let bits: Int | ||
| public var quantMethod: String? = nil | ||
| public var linearClass: String? = nil | ||
| public var quantizationMode: String? = nil | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These were defined so that they could be skipped (below). We can just skip them directly. |
||
| public var mode: String? = nil | ||
| private var _mode: QuantizationMode? = nil | ||
| public var mode: QuantizationMode { _mode ?? .affine } | ||
|
|
||
| public var asTuple: (Int, Int) { (groupSize, bits) } | ||
| public var asTuple: (Int, Int, QuantizationMode) { (groupSize, bits, mode) } | ||
|
|
||
| enum CodingKeys: String, CodingKey { | ||
| case groupSize = "group_size" | ||
| case bits = "bits" | ||
| case quantMethod = "quant_method" | ||
| case linearClass = "linear_class" | ||
| case quantizationMode = "quantization_mode" | ||
| case mode = "mode" | ||
| case _mode = "mode" | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -115,10 +111,11 @@ public struct BaseConfiguration: Codable, Sendable { | |
| switch key.stringValue { | ||
| case Quantization.CodingKeys.groupSize.rawValue: continue | ||
| case Quantization.CodingKeys.bits.rawValue: continue | ||
| case Quantization.CodingKeys.quantMethod.rawValue: continue | ||
| case Quantization.CodingKeys.linearClass.rawValue: continue | ||
| case Quantization.CodingKeys.quantizationMode.rawValue: continue | ||
| case Quantization.CodingKeys.mode.rawValue: continue | ||
| case Quantization.CodingKeys._mode.rawValue: continue | ||
|
|
||
| // additional keys that are not layer instructions, see | ||
| // mlx-community/bitnet-b1.58-2B-4T-4bit | ||
| case "quant_method", "linear_class", "quantization_mode": continue | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Skip directly. |
||
|
|
||
| default: | ||
| if let f = try? container.decode(Bool.self, forKey: key) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The MXFP4 quantization is now supported. This model was used to test that and the quantized kvcache.