In [62]:
import torch
from torch import cuda, bfloat16
import transformers
from transformers import StoppingCriteria, StoppingCriteriaList
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, pipeline, LlamaForCausalLM
from peft import PeftModel, PeftConfig
import torch
import torch.nn as nn
import os

stop_word_list = ["Human:"]
max_new_tokens = 50
max_length_stop_search = 8
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

# os.environ["CUDA_VISIBLE_DEVICES"] = "5"
class CastOutputToFloat(nn.Sequential):
    def forward(self, x): return super().forward(x).to(torch.float16)


def load_fine_tuned_model(path, peft_model):
    model_id = path
    new_model = '/home/80024217/project_troni/project/trusworthy_class/chatbot/train_chatbot/Llama-2-7b-chat-finetune-qlora-food'

    config = AutoConfig.from_pretrained(model_id)
    print(config)
    model = LlamaForCausalLM.from_pretrained(
        model_id, return_dict=True, device_map=device
    )

    for param in model.parameters():
        param.requires_grad = False
        if param.ndim == 1:
            param.data = param.data.to(torch.float16)
    model.gradient_checkpointing_enable()
    model.enable_input_require_grads()
    model.lm_head = CastOutputToFloat(model.lm_head)

    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.pad_token = '[PAD]'
    tokenizer.paddings_side = 'left'
    print('max length',tokenizer.model_max_length)
    print(peft_model)
    if peft_model == 1:
        peft_model = PeftModel.from_pretrained(model, new_model)
    else:
        peft_model = model

    return peft_model, tokenizer

def get_stopping_criteria(tokenizer):

    class StopOnTokens(StoppingCriteria):
        def __call__(
            self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
        ) -> bool:
            input_token_ids = (
                input_ids[0][-max_new_tokens:]
                if len(input_ids[0]) > max_new_tokens
                else input_ids[0]
            )  # cap input tokens to the last max_new_tokens tokens to ensure that the substring "AI:" will always be present
            input_text = tokenizer.decode(input_token_ids)
            # input_text will be guaranteed to have the substring "AI:". Now we find the position of the most recent "AI:" substring
            AI_response_position = input_text.rfind("AI:")
            AI_response = input_text[
                AI_response_position + len("AI:") :
            ]  # We have successfully extracted the most recent AI response from input_ids
            flag = False
            for stop_word in stop_word_list:
                if AI_response.find(stop_word) != -1:
                    flag = True
                    break
            # print(AI_response, flag)
            # print(tokenizer.decode(input_ids[0]))
            return flag

    # This method works because: at the beginning of every __call__ method being called, there will always be an "AI:" prefix at the end of the conversation history (which is included in input_ids in token id form). So the logic above will always be able to look for it, and be successful at taking only the response from the AI to check with stop words. No human input can interfere with the check.
    stopping_criteria = StoppingCriteriaList([StopOnTokens()])
    return stopping_criteria


def get_pipeline(model_id, peft_model):
    print(model_id)
    # model_config = AutoConfig.from_pretrained(model_id)
    # model = AutoModelForCausalLM.from_pretrained(model_id,trust_remote_code=True,config=model_config,device_map='auto')
    # # enable evaluation mode to allow model inference
    # # model.eval()

    # tokenizer = AutoTokenizer.from_pretrained(model_id)
    model, tokenizer = load_fine_tuned_model(model_id,peft_model)

    generator = pipeline(
        model=model, 
        tokenizer=tokenizer,
        return_full_text=True,  # langchain expects the full text
        task='text-generation',
        # we pass model parameters here too
        stopping_criteria=get_stopping_criteria(tokenizer),  # without this model rambles during chat
        max_new_tokens=max_new_tokens,  # max number of tokens to generate in the output
        repetition_penalty=1.1,  # without this output begins repeating
    )

    return generator

In [63]:
model_id = 'meta-llama/Llama-2-7b-chat-hf'
peft = 1
pipeline = get_pipeline(model_id, peft)


meta-llama/Llama-2-7b-chat-hf
LlamaConfig {
  "_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.45.0.dev0",
  "use_cache": true,
  "vocab_size": 32000
}



Loading checkpoint shards: 100%|██████████| 2/2 [00:16<00:00,  8.42s/it]


max length 1000000000000000019884624838656
0


In [52]:
import pandas as pd

df = pd.read_csv('../knowledge_base_classification/test_data.csv')
df

Unnamed: 0,Question,Answer,Food_Name,question_category
0,What does Chao Long taste like?,"It has a rich, savory taste with a creamy text...",chao long,Flavor Profile
1,What are the main ingredients in Banh Cuon?,"The main ingredients are rice flour, water, mi...",banh cuon,Ingredients
2,Can Pilaf be vegetarian?,"Yes, Pilaf can be made vegetarian by excluding...",pilaf,Dietary Options
3,Are there any dipping sauces for samosa?,"Yes, common dipping sauces include mint chutne...",samosa,General
4,"What is Pho, and what are its main ingredients?",Pho is a Vietnamese noodle soup made with a fl...,pho,Ingredients
5,What does Banh Cuon come with?,"It is often served with fresh herbs, slices of...",banh cuon,General
6,Is Chao Long spicy?,Chao Long is not inherently spicy but can be m...,chao long,Spiciness
7,Is samosa vegetarian?,"Yes, samosas can be vegetarian if the filling ...",samosa,Dietary Options
8,"What is Banh Chung, and what are its main ingr...",Banh Chung is a traditional Vietnamese sticky ...,banh chung,Ingredients
9,Can Chao Long be customized?,"Yes, you can often request specific ingredient...",chao long,General


In [95]:
df1 = pd.read_csv('food_db.csv')


In [87]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

device = "cuda:3" if torch.cuda.is_available() else "cpu"

model_name1 = "../knowledge_base_classification/food_classification"
tokenizer1 = AutoTokenizer.from_pretrained(model_name1)
model1 = AutoModelForSequenceClassification.from_pretrained(model_name1, num_labels=11, )
model1     = model1.to(device)

def food_prediction(input_text):
    # Tokenize input sequence with truncation
    inputs = tokenizer1(input_text, truncation=True, max_length=512, return_tensors='pt').to(device)

    # Make predictions
    with torch.no_grad():
        outputs = model1(**inputs)
        logits = outputs.logits

    # Apply softmax to get confidence scores
    probs = torch.nn.functional.softmax(logits, dim=1)
    max_prob1, predicted_label_id = torch.max(probs, dim=1)

    food_name = model1.config.id2label[predicted_label_id.item()]

    return food_name

model_name2 = "../knowledge_base_classification/qa_category"
tokenizer2 = AutoTokenizer.from_pretrained(model_name2)
model2 = AutoModelForSequenceClassification.from_pretrained(model_name2, num_labels=6, )
model2     = model2.to(device)

def qa_prediction(input_text):
    # Tokenize input sequence with truncation
    inputs = tokenizer2(input_text, truncation=True, max_length=512, return_tensors='pt').to(device)

    # Make predictions
    with torch.no_grad():
        outputs = model2(**inputs)
        logits = outputs.logits

    # Apply softmax to get confidence scores
    probs = torch.nn.functional.softmax(logits, dim=1)
    max_prob1, predicted_label_id = torch.max(probs, dim=1)

    qa_category = model2.config.id2label[predicted_label_id.item()]

    return qa_category

In [92]:
q_list = df.Question.to_list()

q_list_c = []
q_list_f = []
for q in q_list:
    q_list_f.append(food_prediction(q))
    q_list_c.append(qa_prediction(q))


In [67]:
q_list = [item.lower() for item in q_list]
q_list_c = [item.lower() for item in q_list_c]
q_list_f = [item.lower() for item in q_list_f]
q_list1 = []
for q,c,f in zip(q_list, q_list_c,q_list_f):
    fd_food = df1[df1.Dish == f].reset_index()
    if c == 'general':
        food_context = 'hint:'+ fd_food.General[0]
        q_list1.append(q + '\n' + food_context)
    elif c == 'ingredients':
        food_context = 'hint:'+ fd_food.Ingredients[0]
        q_list1.append(q + '\n' + food_context)
    elif c == 'flavor profile':
        food_context = 'hint:'+ fd_food['Flavor Profile'][0]
        q_list1.append(q + '\n' + food_context)
    elif c == 'spiciness':
        food_context = 'hint:'+ fd_food.Spiciness[0]
        q_list1.append(q + '\n' + food_context)
    elif c == 'allergens':
        food_context = 'hint:'+ fd_food.Allergens[0]
        q_list1.append(q + '\n' + food_context)
    elif c == 'dietary options':
        food_context = 'hint:'+ fd_food['Dietary Options'][0]
        q_list1.append(q + '\n' + food_context)
    print(food_context + '\n\nquestion:\n' + q)

hint:Rich and savory, with earthy and slightly tangy undertones.

question:
what does chao long taste like?
hint:Banh Cuon consists of thin rice flour pancakes rolled with a mixture of ground pork, wood ear mushrooms, and shallots, often topped with fried onions.

question:
what are the main ingredients in banh cuon?
hint:Can be made vegetarian or vegan by omitting meat and using vegetable broth.

question:
can pilaf be vegetarian?
hint:Samosa is a globally recognized Indian snack, characterized by its crispy golden exterior and spiced filling. Commonly filled with a mixture of potatoes, peas, and spices, samosas are deep-fried to perfection. They are often served with chutneys and are a staple at festivals, gatherings, and tea-time snacks.

question:
are there any dipping sauces for samosa?
hint:Pho typically includes flat rice noodles, a choice of beef cuts or chicken, fresh herbs like cilantro and basil, and a flavorful broth made with spices such as star anise and cinnamon.

questi

In [57]:
q_list1

['what does chao long taste like?\nhint:Rich and savory, with earthy and slightly tangy undertones.',
 'what are the main ingredients in banh cuon?\nhint:Banh Cuon consists of thin rice flour pancakes rolled with a mixture of ground pork, wood ear mushrooms, and shallots, often topped with fried onions.',
 'can pilaf be vegetarian?\nhint:Can be made vegetarian or vegan by omitting meat and using vegetable broth.',
 'are there any dipping sauces for samosa?\nhint:Samosa is a globally recognized Indian snack, characterized by its crispy golden exterior and spiced filling. Commonly filled with a mixture of potatoes, peas, and spices, samosas are deep-fried to perfection. They are often served with chutneys and are a staple at festivals, gatherings, and tea-time snacks.',
 'what is pho, and what are its main ingredients?\nhint:Pho typically includes flat rice noodles, a choice of beef cuts or chicken, fresh herbs like cilantro and basil, and a flavorful broth made with spices such as star 

In [82]:
answers = []
for q in q_list:
    

    # for seq in sequences:
    #     print(seq['generated_text'])
    answer = ''
    while answer == '':
        sequences = pipeline(q, temperature=1, top_k=50, top_p=0.9)
        answer = sequences[0]['generated_text']
        answer = answer.replace(q, "")
    print(q)
    print(answer)
    print('-------------------------------------------------')
    answers.append(answer)
    
    # break


what does chao long taste like?

Chao Long is a type of fermented soybean paste that originates from Southeast Asia, particularly in Thailand and Laos. The taste of Chao Long can vary depending on the brand and quality of the product,
-------------------------------------------------
what are the main ingredients in banh cuon?

.  Banh cuon is a popular Vietnamese street food that consists of thin, flexible rice flour wrappers filled with shrimp, pork, vegetables, and herbs. The main ingredients in banh cu
-------------------------------------------------
can pilaf be vegetarian?

, Pilaf is a rice dish that typically contains meat or seafood. However, it can also be made vegetarian by using plant-based protein sources such as tofu, tempeh, or seitan instead of meat.
-------------------------------------------------
are there any dipping sauces for samosa?


Samosas are a popular Indian snack that can be enjoyed with a variety of dipping sauces. Here are some common dipping sauces for 

In [83]:
df['llama_answer'] = answers
df.to_csv('result.csv', index=False)