@@ -270,13 +270,30 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
270270 const std::string& curr_text = item.first ;
271271 float curr_weight = item.second ;
272272 // printf(" %s: %f \n", curr_text.c_str(), curr_weight);
273- std::vector<int > curr_tokens = tokenizer.encode (curr_text, on_new_token_cb);
274273 int32_t clean_index = 0 ;
274+ if (curr_text == " BREAK" && curr_weight == -1 .0f ) {
275+ // Pad token array up to chunk size at this point.
276+ // TODO: This is a hardcoded chunk_len, like in stable-diffusion.cpp, make it a parameter for the future?
277+ // Also, this is 75 instead of 77 to leave room for BOS and EOS tokens.
278+ int padding_size = 75 - (tokens_acc % 75 );
279+ for (int j = 0 ; j < padding_size; j++) {
280+ clean_input_ids.push_back (tokenizer.EOS_TOKEN_ID );
281+ clean_index++;
282+ }
283+
284+ // After padding, continue to the next iteration to process the following text as a new segment
285+ tokens.insert (tokens.end (), clean_input_ids.begin (), clean_input_ids.end ());
286+ weights.insert (weights.end (), padding_size, curr_weight);
287+ continue ;
288+ }
289+
290+ // Regular token, process normally
291+ std::vector<int > curr_tokens = tokenizer.encode (curr_text, on_new_token_cb);
275292 for (uint32_t i = 0 ; i < curr_tokens.size (); i++) {
276293 int token_id = curr_tokens[i];
277- if (token_id == image_token)
294+ if (token_id == image_token) {
278295 class_token_index.push_back (clean_index - 1 );
279- else {
296+ } else {
280297 clean_input_ids.push_back (token_id);
281298 clean_index++;
282299 }
@@ -379,6 +396,22 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
379396 for (const auto & item : parsed_attention) {
380397 const std::string& curr_text = item.first ;
381398 float curr_weight = item.second ;
399+
400+ if (curr_text == " BREAK" && curr_weight == -1 .0f ) {
401+ // Pad token array up to chunk size at this point.
402+ // TODO: This is a hardcoded chunk_len, like in stable-diffusion.cpp, make it a parameter for the future?
403+ // Also, this is 75 instead of 77 to leave room for BOS and EOS tokens.
404+ size_t current_size = tokens.size ();
405+ size_t padding_size = (75 - (current_size % 75 )) % 75 ; // Ensure no negative padding
406+
407+ if (padding_size > 0 ) {
408+ LOG_DEBUG (" BREAK token encountered, padding current chunk by %zu tokens." , padding_size);
409+ tokens.insert (tokens.end (), padding_size, tokenizer.EOS_TOKEN_ID );
410+ weights.insert (weights.end (), padding_size, 1 .0f );
411+ }
412+ continue ; // Skip to the next item after handling BREAK
413+ }
414+
382415 std::vector<int > curr_tokens = tokenizer.encode (curr_text, on_new_token_cb);
383416 tokens.insert (tokens.end (), curr_tokens.begin (), curr_tokens.end ());
384417 weights.insert (weights.end (), curr_tokens.size (), curr_weight);
0 commit comments