Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix: missing hparam type_vocab_size #32

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 6 additions & 1 deletion bert.cpp
Expand Up @@ -23,6 +23,7 @@ struct bert_hparams
int32_t n_intermediate = 1536;
int32_t n_head = 12;
int32_t n_layer = 6;
int32_t n_vocab_size = 2;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is a break change since new field.
I believe some others will happen in future.
So we may try to shift into GGUF.

https://github.com/philpax/ggml/blob/gguf-spec/docs/gguf.md

int32_t f16 = 1;
};

Expand Down Expand Up @@ -364,6 +365,7 @@ struct bert_ctx * bert_load_from_file(const char *fname)
fin.read((char *)&hparams.n_intermediate, sizeof(hparams.n_intermediate));
fin.read((char *)&hparams.n_head, sizeof(hparams.n_head));
fin.read((char *)&hparams.n_layer, sizeof(hparams.n_layer));
fin.read((char *)&hparams.n_vocab_size, sizeof(hparams.n_vocab_size));
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so does here.

fin.read((char *)&hparams.f16, sizeof(hparams.f16));

printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
Expand All @@ -372,6 +374,7 @@ struct bert_ctx * bert_load_from_file(const char *fname)
printf("%s: n_intermediate = %d\n", __func__, hparams.n_intermediate);
printf("%s: n_head = %d\n", __func__, hparams.n_head);
printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
printf("%s: n_vocab_size = %d\n", __func__, hparams.n_vocab_size);
printf("%s: f16 = %d\n", __func__, hparams.f16);
}

Expand Down Expand Up @@ -489,11 +492,13 @@ struct bert_ctx * bert_load_from_file(const char *fname)
const int n_intermediate = hparams.n_intermediate;
const int n_max_tokens = hparams.n_max_tokens;
const int n_vocab = hparams.n_vocab;
const int n_vocab_size = hparams.n_vocab_size;


model.layers.resize(n_layer);

model.word_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab);
model.token_type_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, 2);
model.token_type_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_vocab_size);
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@skeskinen
here is a change since the tensor is related to n_vocab_size.
for many case, it is 2, but not a const.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

model.position_embeddings = ggml_new_tensor_2d(ctx, wtype, n_embd, n_max_tokens);

model.ln_e_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
Expand Down
1 change: 1 addition & 0 deletions models/convert-to-ggml.py
Expand Up @@ -61,6 +61,7 @@
fout.write(struct.pack("i", hparams["intermediate_size"]))
fout.write(struct.pack("i", hparams["num_attention_heads"]))
fout.write(struct.pack("i", hparams["num_hidden_layers"]))
fout.write(struct.pack("i", hparams["type_vocab_size"]))
fout.write(struct.pack("i", ftype))

for i in range(hparams["vocab_size"]):
Expand Down
4 changes: 4 additions & 0 deletions models/quantize.cpp
Expand Up @@ -20,6 +20,7 @@ struct bert_hparams
int32_t n_intermediate = 1536;
int32_t n_head = 12;
int32_t n_layer = 6;
int32_t n_vocab_size = 2;
int32_t f16 = 1;
};

Expand Down Expand Up @@ -74,6 +75,7 @@ bool bert_model_quantize(const std::string & fname_inp, const std::string & fnam
finp.read((char *) &hparams.n_intermediate, sizeof(hparams.n_intermediate));
finp.read((char *) &hparams.n_head, sizeof(hparams.n_head));
finp.read((char *) &hparams.n_layer, sizeof(hparams.n_layer));
finp.read((char *) &hparams.n_vocab_size, sizeof(hparams.n_vocab_size));
finp.read((char *) &hparams.f16, sizeof(hparams.f16));

printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
Expand All @@ -82,6 +84,7 @@ bool bert_model_quantize(const std::string & fname_inp, const std::string & fnam
printf("%s: n_intermediate = %d\n", __func__, hparams.n_intermediate);
printf("%s: n_head = %d\n", __func__, hparams.n_head);
printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
printf("%s: n_vocab_size = %d\n", __func__, hparams.n_vocab_size);
printf("%s: f16 = %d\n", __func__, hparams.f16);

fout.write((char *) &hparams.n_vocab, sizeof(hparams.n_vocab));
Expand All @@ -90,6 +93,7 @@ bool bert_model_quantize(const std::string & fname_inp, const std::string & fnam
fout.write((char *) &hparams.n_intermediate, sizeof(hparams.n_intermediate));
fout.write((char *) &hparams.n_head, sizeof(hparams.n_head));
fout.write((char *) &hparams.n_layer, sizeof(hparams.n_layer));
fout.write((char *) &hparams.n_vocab_size, sizeof(hparams.n_vocab_size));
fout.write((char *) &itype, sizeof(hparams.f16));
}

Expand Down