pytorch · lucylq · Feb 10, 2024
@@ -499,6 +499,12 @@ def __init__(self, **kwargs):
         device = "cpu"
         # flake8: noqa: TOR102
         checkpoint = torch.load(checkpoint_path, map_location=device)
+        if kwargs.get("fairseq2", False):
+            print("Using fairseq2 checkpoint")
+            checkpoint = convert_to_llama_checkpoint(checkpoint=checkpoint)
+        if "model" in checkpoint:
+            # NB: some checkpoint contains a "model" field, which is the actual weights dict
+            checkpoint = checkpoint["model"]
         # get checkpoint dtype
         self.dtype = None
         if len(checkpoint) > 0:
@@ -513,12 +519,6 @@ def __init__(self, **kwargs):
                 print(
                     f"Mixed dtype model. Dtype of {first.key}: {first.dtype}. Mismatches in the checkpoint: {mismatched_dtypes}"
                 )
-        if kwargs.get("fairseq2", False):
-            print("Using fairseq2 checkpoint")
-            checkpoint = convert_to_llama_checkpoint(checkpoint=checkpoint)
-        if "model" in checkpoint:
-            # NB: some checkpoint contains a "model" field, which is the actual weights dict
-            checkpoint = checkpoint["model"]
         with open(params_path, "r") as f:
             params = json.loads(f.read())
         max_seq_len = 128