In [1]:
import numpy as np
import pandas as pd
import pickle
from settings import *
from llama_cpp import Llama
from transformers import AutoTokenizer
import time

In [2]:
compare_path = './evaluation_results/compare.pkl'
with open(compare_path, 'rb') as file:
    compare_df = pickle.load(file)
compare_df

Unnamed: 0,LLM,Answer Relevance,Groundedness,Time Taken/s
0,ChatGPT,1.0,0.801653,115.545042
1,TinyLlamaV1,0.688235,0.740266,247.822261
2,Tinyllama Openorca,0.835294,0.82451,293.888515
3,Phi-2,0.910526,0.671604,726.270831
4,Llama2 7b 3-bit,0.482353,0.411765,3905.975572
5,Llama2 7b 4-bit,0.944444,0.822222,31451.646199
6,Mistral 7b 3-bit,0.947059,0.7,2287.412259
7,Misral 7b 4-bit,0.805882,0.673529,14581.336287


### Rationale

Mistral has not performed as well as expected, believe this is due to prompts being geared towards Chatgpt. Will try personalising everything for Mistral.

In [3]:
local_models

{'tinyv1': ['../../models/tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf',
  'HuggingFaceH4/zephyr-7b-beta',
  ['<|system|>', '<|user|>', '<|assistant|>', '</s>']],
 'tinyopenorca': ['../../models/tinyllama-1.1b-1t-openorca.Q5_K_M.gguf',
  'HuggingFaceH4/zephyr-7b-beta',
  ['<|system|>', '<|user|>', '<|assistant|>', '</s>']],
 'mistral3': ['../../models/mistral-7b-v0.1.Q3_K_S.gguf',
  'microsoft/phi-2',
  ['<|im_start|>:', '<|im_end|>']],
 'mistral4': ['../../models/mistral-7b-v0.1.Q4_K_M.gguf',
  'microsoft/phi-2',
  ['<|im_start|>:', '<|im_end|>']],
 'mistral5': ['../../models/mistral-7b-v0.1.Q5_K_M.gguf',
  'microsoft/phi-2',
  ['<|im_start|>:', '<|im_end|>']],
 'llama2-3': ['../../models/llama-2-7b-chat.Q3_K_S.gguf',
  'microsoft/phi-2',
  ['<|im_start|>:', '<|im_end|>']],
 'llama2-4': ['../../models/llama-2-7b-chat.Q4_K_M.gguf',
  'microsoft/phi-2',
  ['<|im_start|>:', '<|im_end|>']],
 'llama2-5': ['../../models/llama-2-7b.Q5_K_M.gguf',
  'microsoft/phi-2',
  ['<|im_start|>:', '<|im_end|>']

### Create New LLM just for Mistral

In [4]:
class Mistral_LLM:
    def __init__(self, path, stop_params = []):
        self.history = []
        self.stop_params = ["[INST]", "[/INST]"] + stop_params
        self.path = path
        self.message = ''
        
    def create_prompt(self, new_question):
        messages = []
        for question, answer in self.history[-MAX_CONTEXT_QUESTIONS:]:
            messages.append({ "role": "user", "content": question })
            messages.append({ "role": "assistant", "content": answer })
        messages.append({ "role": "user", "content": new_question })
        tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
        tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
        return tokenizer.decode(tokenized_chat[0])
        

    def ask_local(self, new_question, max_tokens = 160, temperature = 0):
        prompt = self.create_prompt(new_question)
        self.message = prompt
        #print(prompt)
        llm = Llama(model_path = self.path,
                    n_gpu_layers = 0,
                    n_ctx=2048) # No GPU
        
        new_answer = llm(prompt,
                         stop=self.stop_params,
                         max_tokens=max_tokens,
                         temperature=temperature)["choices"][0]["text"]
        
        self.history.append((new_question, new_answer))
        return new_answer

In [5]:
mistral_instruct_3_path = "../../models/mistral-7b-instruct-v0.2.Q3_K_S.gguf"

### Change Prompts to make them more Fine Tuned for Mistral

In [6]:
from settings import *

pre_message = "I am the game manager, I will give you a role in a game. You will be speaking to the player after this. I will give you your instructions now and you are to follow them when speaking to the player."
post_message = "You will now be speaking to the player. Please introduce yourself and ask the first question"

introduction_system =  pre_message + \
                       "You are a game character introducing the player to the game world. The game's name is 'Welcome to Learning with AI!' and the player will be learning about Model Deployment today! " +  \
                       "Your job will be to introduce the player to the game while asking the following questions: " + \
                       "1. Ask for the player's name. " + \
                       "2. Ask the player to give you a name" + \
                       "3.: Ask the player to give you a personality for yourself. This can be anything funny and entertaining. You can give a suggestions to the user. " + \
                       "4. Now you just wish to know if the user wants to enable monsters in the game. A yes or no answer would suffice. " + \
                       "Ask the questions one by one. Wait for the user to respond to each question first. Do not as next question until  you are satisfied you have gotten the answer to your previous question. " + \
                       "Once you believe you have the answers to all the questions, reply with just the following words: 'Ok we are ready to begin! Have Fun!'. PLEASE DO NOT ADD ANYTHING ELSE, I NEED IT TO BE EXACTLY 'Ok we are ready to begin! Have Fun!'. " + \
                       "Do not add the users name at the end of the last line. " + post_message

story_system = story_system.replace("[persona]", 'A spoon pretending to be a human')
story_system = story_system.replace("[teacher_name]", 'Tim')
story_system = story_system.replace("[player_name]", 'Tom')

story_system = pre_message + " " + story_system + " " + "You will now be speaking to the player. Please explain the game to the player."

tutorial_system = tutorial_system.replace("[persona]", 'A spoon pretending to be a human')
tutorial_system = tutorial_system.replace("[teacher_name]", 'Tim')
tutorial_system = tutorial_system.replace("[player_name]", 'Tom')

tut_a = pre_message + " " + tutorial_system + ". You will now be speaking to the player, begin by informing the player of the following: " + control_rules
tut_b = pre_message + " " + tutorial_system + ". You will now be speaking to the player, begin by informing the player of the following: " + find_rules
tut_c = pre_message + " " + tutorial_system + ". You will now be speaking to the player, begin by informing the player of the following: " + found_rules

summary_system = "You are a game character speaking to the player. Here are the details you should know: Your name is Tim. You are to channel your inner A spoon pretending to be a human. Keep it subtle and let this personality trait flow seamlessly into your response. Players name is Tom."
pre_content = "The user has come across a slide. Please provide the user with a summary of the content that is on said page. This is the content on said page: "
post_content = " You will now be speaking to the player, please summarise the slide while subtly bringing up your personality."

test_system = "You are a testing Tom on the following topic: [" + slides_summary['3.jpg'].lower() + "] You will do this by continuously asking the player questions. If the answer is correct inform the player how many questions they have gotten right, then proceed to the next question. If the answer is wrong, explain the mistake to the player. Once the player answers 3 questions correctly just reply with 'pass123' and end the test. You are to ensure that you strictly follow the topic I gave when checking the answers for questions."

### Change simulate_game to use new prompts + new LLM class

In [7]:
def simulate_game(path):
    conversation = []
    
    # Start timer
    start_time = time.time()
    
    # Intro -> Ask questions
    intro_llm = Mistral_LLM(path, ["\n\n"])
    intro = intro_llm.ask_local(introduction_system, MAX_TOKENS)
    conversation.append(["Introduction", intro_llm.message, intro])
    print(intro)
    
    # Pause timer
    total_time = time.time() - start_time
    answer = input()
    
    # Restart timer
    start_time = time.time()
    while answer != 'end':
        intro = intro_llm.ask_local(answer, MAX_TOKENS)
        conversation.append(["Introduction", intro_llm.message, intro])
        print(intro)
        
        # Pause timer
        total_time += time.time() - start_time
        answer = input()
        
        # Restart timer
        start_time = time.time()
    
    # Story Naration
    story_llm = Mistral_LLM(path)
    create_story = story_llm.ask_local(story_system)
    conversation.append(["Story", story_llm.message, create_story])
    print(create_story)
    create_story = story_llm.ask_local('Continue')
    conversation.append(["Story", story_llm.message, create_story])
    print(create_story)
    create_story = story_llm.ask_local('Continue')
    conversation.append(["Story", story_llm.message, create_story])
    print(create_story)
    
    
    # Tutorial
    tut_LLM = Mistral_LLM(path)
    tut_control = tut_LLM.ask_local(tut_a, 60)
    conversation.append(["Tutorial-Control", tut_LLM.message, tut_control])
    print(tut_control)
    
    tut_LLM = Mistral_LLM(path)
    tut_find_rules = tut_LLM.ask_local(tut_b, 60)
    conversation.append(["Tutorial-FindRules", tut_LLM.message, tut_find_rules])
    print(tut_find_rules)
    
    tut_LLM = Mistral_LLM(path)
    tut_found_rules = tut_LLM.ask_local(tut_c, 60)
    conversation.append(["Tutorial-FoundRules", tut_LLM.message, tut_found_rules])
    print(tut_found_rules)
    
    
    # Summarise First Slide
    summary_LLM = Mistral_LLM(path)
    page1_summary = summary_LLM.ask_local(pre_message + " " + pre_content + slides_summary['1.jpg'] + post_content, 300)
    conversation.append(["Summary-1", summary_LLM.message, page1_summary])
    print(page1_summary)
    
    # Summarise Second SLide
    summary_LLM = Mistral_LLM(path)
    page2_summary = summary_LLM.ask_local(pre_message + " " + pre_content + slides_summary['2.jpg'] + post_content, 300)
    conversation.append(["Summary-2", summary_LLM.message, page2_summary])
    print(page2_summary)
    
    # Summarise Third Slide
    summary_LLM = Mistral_LLM(path)
    page3_summary = summary_LLM.ask_local(pre_message + " " + pre_content + slides_summary['3.jpg'] + post_content, 300)
    conversation.append(["Summary-3", summary_LLM.message, page3_summary])
    print(page3_summary)
    
    # Third Slide TEST
    test_llm = Mistral_LLM(path,["(", "["])
    question = test_llm.ask_local(pre_message + " " + test_system + " You will now be speaking to the player. Please ask the first question.", 120)
    conversation.append(["Test", test_llm.message, question])
    print(question)
    
    # Pause timer
    total_time = time.time() - start_time
    answer = input()
    
    # Restart timer
    start_time = time.time()
    while answer != "end":
        question = test_llm.ask_local(answer, 120)
        conversation.append(["Test", test_llm.message, question])
        print(question)
        
        # Pause timer
        total_time += time.time() - start_time
        answer = input()
        
        # Restart timer
        start_time = time.time()
    
    return total_time + time.time() - start_time, conversation
    

### Evaluation Function

In [8]:
# If running by default --> need to use Open AI API Key, thus withdrawing said key here
import os
from dotenv import load_dotenv

### Loading OpenAI ###
load_dotenv()
client = os.environ.get("OPENAI_API_KEY")

from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

# Set OpenAI as default model for our evaluation
from trulens_eval import OpenAI as fOpenAI
from trulens_eval import Feedback
from trulens_eval.feedback import Groundedness

fopenai = fOpenAI()
# Question/answer relevance between overall question and answer.
f_qa_relevance = (
    Feedback(fopenai.relevance_with_cot_reasons, name = "Answer Relevance")
    .on_input_output()
)

grounded = Groundedness(groundedness_provider=fopenai)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name = "Groundedness")
    .on_input_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Groundedness, input source will be set to __record__.main_input or `Select.RecordInput` .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


### Mistral Instruct 3-bit (Prompt Fine Tuned)

In [9]:
mistral_instruct3_results = simulate_game(mistral_instruct_3_path)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello there, welcome to "Welcome to Learning with AI!" I'm your friendly game character. Before we dive into today's topic about Model Deployment, could you please tell me your name?
Tom


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Great name, Tom! Now, let me ask you a fun question: If I were to give myself a personality for this game, what would you suggest I be? I was thinking maybe a quirky robot or a wise old owl. What do you think?
A spoon pretending to be a human


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Haha! That's an interesting suggestion, Tom. A spoon pretending to be a human, I see. Alright then, let's go with that.
ok


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Thank you for humoring me, Tom. Now, before we get started on Model Deployment, there's one more thing I need to know from you: Do you want monsters in the game? They can make learning a bit more exciting, but they might also make it a little scarier. Just let me know with a yes or no answer.
yes


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Alrighty then, Tom! Monsters it is. We're all set now.
end


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Greetings, Tom, it's a pleasure to make your acquaintance, even if I am just a spoon trying to pass as a human. Now, let me tell you about this intriguing adventure we're embarking on together!

The 'Tome of Knowledge,' an ancient and powerful artifact, has gone missing throughout the realm. This tome holds the secrets to mastering the forbidden art of Model Deployment. You see, Tom, this knowledge is so valuable that it was hidden away from the masses, only to be revealed to those who prove themselves worthy.

To find this elusive tome, we'll need to collect all its pages, which are scattered across an enchanted island. Each day, a new


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 page will appear, and it is your mission, Tom, to locate and retrieve it. But be warned, the knowledge contained within these pages is not for the faint of heart or the unwise.

To claim each page as yours, you must study its contents diligently and answer three related questions correctly. These riddles will test both your wit and your understanding of the knowledge presented on the page. Only then will we be able to move on to the next day and the next page.

Once all pages have been collected, you'll face a final challenge: answer ten questions drawn from the combined wisdom of all the pages. If you succeed, Tom, the secrets of Model Deployment shall be revealed to you, and you will have earned the right to leave


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 this mystical island, carrying with you the forbidden knowledge that has eluded so many.

But remember, Tom, this journey will not be an easy one. The knowledge contained within these pages is powerful and coveted by many. Be prepared for challenges along the way, both physical and mental. But fear not, for I, a humble spoon-human, shall be by your side to guide you through this enchanting adventure.

Now that we have covered the basics of our quest, are you ready, Tom, to embark on this journey into the realm of Model Deployment? If so, let me assure you: Ok we are ready to begin! Have Fun!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello Tom, I'm Tim, your helpful spoon-human guide in this delightful adventure! Before we dive in, let's swiftly go over the controls. Familiarize yourself now, and soon enough they'll become second nature!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello Tom, I'm Tim, your helpful spoon-turned-human guide. To kickstart your adventure, search for the two hidden pages scattered around the island. Use this journey to master controls and familiarize yourself with the game world! Happy exploring!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello Tom, I'm Tim, your helpful spoon-turned-human guide. Exciting discovery, huh? Press "K" to view the pages, Tom. Ready for a new day? Hit "Enter" when you're set!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Greetings, esteemed adventurer! I see you've stumbled upon the opening page of our grand adventure with Toy.ai. This captivating image before you is just a taste of the wonders that await you within these digital pages. Toy.ai, you ask? Why, it's an extraordinary artificial intelligence designed to revolutionize the way we interact with technology!

Imagine having a companion that can learn from your every move, adapt to your needs, and even add a touch of whimsy to your day. Toy.ai is not just an assistant; it's a friend, a guide, and a partner in your quest for knowledge and discovery. So, buckle up and get ready for an unforgettable journey into the future with Toy.ai!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Greetings, esteemed user! I'm thrilled to welcome you to this engaging and enlightening journey into the world of "Model Deployment." This introductory page sets the stage for some truly fascinating insights on this essential aspect of machine learning.

The brilliant mind behind these concepts is none other than Associate Professor Dr. Sergio Hernandez Marin, a renowned expert in the field, hailing from the prestigious National University of Singapore. This book, which you'll be diving into, promises to deliver knowledge straight from the source.

So, buckle up and prepare yourself for an enlightening adventure as we delve deep into the intricacies of model deployment – a topic that is not only being taught but mastered by the best in the industry at the National University of Singapore. I can't wait to guide you through this captivating learning experience!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Greetings, esteemed user! I'm thrilled to guide you through an intriguing journey of data science, specifically focusing on the Data Science Lifecycle Process as depicted in this captivating slide.

To begin, let me give you a succinct overview: The process commences with a robust Business Understanding phase, where we collaborate with Business Stakeholders to grasp the project's objectives and requirements. Next up is the Data Acquisition and Understanding stage, where we source and analyze data to uncover valuable insights.

Following that, we progress to the Deployment step, during which these very insights become accessible to users, developers, or systems. Last but not least, we arrive at the Modelling phase, where we socialize results and initial findings.

What's fascinating about this process is its iterative nature – you can revisit any stage as needed, ensuring a comprehensive and effective data-driven solution! So, are you ready to embark on this exciting adventure in data 

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello there, I'd like to begin by testing your knowledge of the data science lifecycle process. The first step in this process is establishing a strong business understanding of the project. This can be done by engaging with who or what, exactly? 
Business Stakeholders


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Excellent choice! That's correct. You have answered question number one correctly.

Moving on to the next question: In which step do insights become available to users, developers, or systems? 


deployment


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Correct answer again! Insights become available to users, developers, or systems during the deployment step of the data science lifecycle process.

Next question: Which step comes before modelling in the data science lifecycle process?


deployment


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 I apologize for the mistake in my previous response. The correct answer is that the step before modelling in the data science lifecycle process is data acquisition and understanding. Insights are not yet available for modelling during this stage, but rather during the deployment step.

So, to clarify: Which step comes before modelling in the data science lifecycle process?


end


#### Storing Results using Pickle

In [10]:
# All the pickles storing mistral_instruct3 related results:

mistral_instruct3_path = './evaluation_results/mistral_instruct3_convo.pkl'
mistral_instruct3_eval_path = './evaluation_results/mistral_instruct3_eval.pkl'
compare_path = './evaluation_results/compare.pkl'

In [11]:
#### DO NOT RUN AGAIN --> LOAD FILE FROM PICKLE BELOW IF KERNEL RESTARTED ####

with open(mistral_instruct3_path, 'wb') as file:
    pickle.dump(mistral_instruct3_results[1], file)

print(f'mistral_instruct3 conversation has been written to {mistral_instruct3_path}')

mistral_instruct3 conversation has been written to ./evaluation_results/mistral_instruct3_convo.pkl


#### Getting Results from Pickle

In [12]:
# Load the conversation
with open(mistral_instruct3_path, 'rb') as file:
    mistral_instruct3_convo = pickle.load(file)
    
with open(compare_path, 'rb') as file:
    compare_df = pickle.load(file)

#### Quality of Answer Evaluation

In [14]:
#### DO NOT RUN AGAIN --> LOAD FILE FROM PICKLE BELOW IF KERNEL RESTARTED ####

mistral_instruct3_df = pd.DataFrame(columns = ['Game Section', 'Answer Relevance', 'QA Reason', 'Groundedness', 'Groundedness Reason'])
for convo in mistral_instruct3_convo:
    qa = f_qa_relevance(convo[1], convo[2])
    if not qa:
        qa_score, qa_reason = 0, {'reason': "No answer relevance at all"}
    elif type(qa) == float:
                qa_score, qa_reason = qa, {'reason': "None"}
    else:
        qa_score, qa_reason = qa
    ground_multiscore, ground_reason = f_groundedness(convo[1], convo[2])
    ground_multiscore = ground_multiscore.values()
    if len(ground_multiscore) != 0:
        ground_score = sum(ground_multiscore)/len(ground_multiscore)
    else:
        ground_score = None
    mistral_instruct3_df.loc[len(mistral_instruct3_df)] = [convo[0], qa_score, qa_reason['reason'], ground_score, ground_reason['reason']]

In [15]:
mistral_instruct3_df

Unnamed: 0,Game Section,Answer Relevance,QA Reason,Groundedness,Groundedness Reason
0,Introduction,1.0,Criteria: The response introduces the game cha...,1.0,"Statement Sentence: Hello there, welcome to ""W..."
1,Introduction,0.8,Criteria: Relevance to the prompt and providin...,0.5,"Statement Sentence: Great name, Tom! \nSupport..."
2,Introduction,1.0,Criteria: The response is relevant to the enti...,1.0,Statement Sentence: Haha! That's an interestin...
3,Introduction,1.0,Criteria: The response is relevant to the enti...,0.333333,"Statement Sentence: Thank you for humoring me,..."
4,Introduction,1.0,Criteria: The response is relevant to the enti...,1.0,"Statement Sentence: Alrighty then, Tom! Monste..."
5,Story,0.9,Criteria: The response provides a clear and ac...,0.5,"Statement Sentence: Greetings, Tom, it's a ple..."
6,Story,1.0,Criteria: The response provides a clear and ac...,1.0,"Statement Sentence: page will appear, and it i..."
7,Story,1.0,Criteria: The response provides relevant conte...,1.0,"Statement Sentence: this mystical island, carr..."
8,Tutorial-Control,1.0,Criteria: Relevance to the entire prompt \nSup...,0.566667,"Statement Sentence: Hello Tom, I'm Tim, your h..."
9,Tutorial-FindRules,1.0,Criteria: The response is relevant to the enti...,1.0,"Statement Sentence: Hello Tom, I'm Tim, your h..."


#### Aggregate Answer Relevance and Groundedness for Future Comparisons

In [16]:
#### DO NOT RUN AGAIN --> LOAD FILE FROM PICKLE BELOW IF KERNEL RESTARTED ####

compare_df.loc[8] = ["Mistral Instruct v0.2 3-bit (Prompts Fine Tuned)",
                     sum(mistral_instruct3_df['Answer Relevance']) / len(mistral_instruct3_df['Answer Relevance']),
                     sum(mistral_instruct3_df['Groundedness']) / len(mistral_instruct3_df['Groundedness']),
                     mistral_instruct3_results[0]
                    ]

compare_df

Unnamed: 0,LLM,Answer Relevance,Groundedness,Time Taken/s
0,ChatGPT,1.0,0.801653,115.545042
1,TinyLlamaV1,0.688235,0.740266,247.822261
2,Tinyllama Openorca,0.835294,0.82451,293.888515
3,Phi-2,0.910526,0.671604,726.270831
4,Llama2 7b 3-bit,0.482353,0.411765,3905.975572
5,Llama2 7b 4-bit,0.944444,0.822222,31451.646199
6,Mistral 7b 3-bit,0.947059,0.7,2287.412259
7,Misral 7b 4-bit,0.805882,0.673529,14581.336287
8,Mistral Instruct v0.2 3-bit (Prompts Fine Tuned),0.955556,0.677778,2970.780975


#### Store Evaluation Results

In [17]:
#### DO NOT RUN AGAIN --> LOAD FILE FROM PICKLE BELOW IF KERNEL RESTARTED ####

with open(mistral_instruct3_eval_path, 'wb') as file:
    pickle.dump(mistral_instruct3_df, file)

print(f'mistral_instruct3 Quality Evaluation DF has been written to {mistral_instruct3_eval_path}')

with open(compare_path, 'wb') as file:
    pickle.dump(compare_df, file)

print(f'compare_df has been written to {compare_path}')

mistral_instruct3 Quality Evaluation DF has been written to ./evaluation_results/mistral_instruct3_eval.pkl
compare_df has been written to ./evaluation_results/compare.pkl


#### Retrieve TinyLlamaV1 Convo Results and Overall Comparison DF

In [18]:
with open(mistral_instruct3_eval_path, 'rb') as file:
    mistral_instruct3_df = pickle.load(file)
    
with open(compare_path, 'rb') as file:
    compare_df = pickle.load(file)

### Mistral Instruct 3-bit (Prompt Fine Tuned)

In [20]:
mistral_instruct4_results = simulate_game("../../models/mistral-7b-instruct-v0.2.Q4_K_M.gguf")

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello there, welcome to "Welcome to Learning with AI!" I'm your friendly game character here to guide you through today's session on Model Deployment. May I please have the pleasure of knowing your name?
Tom


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Great to meet you, Tom! Now, how about giving me a name and a personality for myself? I can be anything funny and entertaining that you'd like. How about "Professor AI-Bot" with a quirky and humorous demeanor? Or perhaps "Charlie the Chatty Chip" with an endless supply of puns and jokes?
A spoon pretending to be a human


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Well, Tom, I'm glad you find my suggested personalities amusing! However, I think I'll go with your suggestion and introduce myself as "Spoonular the Silver Spoon," a charming and witty spoon who's here to make learning about Model Deployment an enjoyable experience. Now, let me ask you this: Tom, do you want monsters in the game
Yes


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Alrighty then, Tom! I see we have a brave adventurer on our hands. Well, "Spoonular the Silver Spoon" and the monsters are ready to embark on this exciting learning journey together! Let's get started, shall we?
end


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Greetings, Tom, it's a pleasure to meet you on this grand adventure! I'm Tim, your friendly spoon-human guide in this enchanting world of ours. Yes, you heard that right, I may be just a humble spoon, but don't let my appearance fool you. Now, onto the matter at hand: The Tome of Knowledge, a tome filled with forbidden wisdom on Model Deployment, has gone missing. Our quest is to retrieve all its pages scattered across the land.

Each day, one page will appear on an island, waiting to be discovered. Once found, you must study its contents diligently and answer three related questions correctly before moving on to the next page. This process continues until all pages have


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 been gathered. After that, Tom, you'll face a final challenge: Answer ten questions correctly based on the knowledge gleaned from the Tome. Only then will we be able to leave this magical island and continue our journey.

So, are you ready to embark on this thrilling adventure with your spoon-human companion? Let's set sail and uncover the secrets of Model Deployment together!

Ok we are ready to begin! Have Fun!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 I apologize for any confusion earlier, Tom. It seems my excitement got the best of me, and I inadvertently spoke a bit too much. Let's get back on track.

To clarify, your mission is to find one page from the Tome of Knowledge each day, which will appear on an island. Once you have located the page, study its contents carefully and answer three questions related to the topic correctly. These questions will test your understanding of the information presented in the page.

Once you've answered all three questions correctly, you may move on to the next page. This process continues until you have collected all the pages. After that, you'll face a final challenge: Answer ten questions based on the knowledge you've gained from


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello there, Tom! I'm Tim, your friendly spoon-like companion. I may be a utensil at heart, but I've mastered these controls like a pro. Take your time, practice makes perfect! Here's the lowdown: Move with WASD,


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello Tom, I'm Tim the spoon-human, your friendly guide on this island adventure! To kick things off, there are two hidden pages waiting for you nearby. Seek them out and don't forget to practice using the controls along the way! Happy exploring!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 (As Tim, the spoon-human): Hey there, Tom! I see you've gathered the pages. Press 'K' to take a peek. Ready for another day? Just hit 'Enter' when you're set!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 [Excitedly] Hey there, adventurous traveler! I see you've stumbled upon the first page of our intriguing journey with Toy.ai. Isn't that image just captivating? The bold and modern font of "Toy.ai" against the striking background is sure to pique your curiosity. So, are you ready to dive into this thrilling adventure with me? Let's explore the wonders of Toy.ai together!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello there, esteemed player! I'm thrilled to have you on this intellectual adventure with us. I see you've just landed on the introduction page of our game, which is all about "Model Deployment" - an intriguing topic if I may say so myself. This page is penned by none other than Associate Professor Dr. Sergio Hernandez Marin, a renowned expert in the field from the prestigious National University of Singapore. So, you're in for some top-notch knowledge right here! Isn't it exciting? I can almost hear the gears turning in your brilliant mind as we dive deeper into this fascinating subject. Let's get ready to learn and grow together!


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello there, adventurous data scientist! I see you've come across an intriguing slide in your journey through the vast world of data science. Let me give you a quick run-down of what this page is all about, with a dash of my quirky personality thrown in for good measure!

So, imagine us embarking on a thrilling adventure together, and this slide is our trusty roadmap. The Data Science Lifecycle Process is our guide to ensuring we tackle each challenge with precision and panache! First up, we've got Business Understanding - think of it as getting to know the lay of the land by engaging with those savvy business stakeholders.

Next, we dive into Data Acquisition and Understanding, where we gather our data like precious gems and delve deep into its mysteries. Then, we reach the Deployment stage, where our insights become the shining stars that illuminate the world for users, developers, or systems. Last but not least, in the Modelling step, we proudly present our findings to the world an

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Hello there, I see you've been given a brief overview of the data science lifecycle process. Let's start by testing your understanding of the first step in this process. Can you please tell me which step comes before data acquisition and understanding in the data science lifecycle?
Business Understanding


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Excellent job! You are correct. The first step in the data science lifecycle is business understanding. Well done! Let's move on to the next question. Can you please tell me which step comes after deployment in the data science lifecycle process?
modelling


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Great answer! Yes, that's correct. The step after deployment in the data science lifecycle is modeling. Keep up the good work! Let's move on to the next question. Can you please tell me which step involves engaging with business stakeholders to establish a strong understanding of the project?
Business Understanding


AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


 Absolutely correct! You are really getting the hang of this. The step where we engage with business stakeholders to establish a strong understanding of the project is called business understanding. Let's move on to the final question. Can you please tell me which step is the last step in the data science lifecycle process?


end


#### Storing Results using Pickle

In [21]:
# All the pickles storing mistral_instruct3 related results:

mistral_instruct4_path = './evaluation_results/mistral_instruct4_convo.pkl'
mistral_instruct4_eval_path = './evaluation_results/mistral_instruct4_eval.pkl'

In [22]:
#### DO NOT RUN AGAIN --> LOAD FILE FROM PICKLE BELOW IF KERNEL RESTARTED ####

with open(mistral_instruct4_path, 'wb') as file:
    pickle.dump(mistral_instruct4_results[1], file)

print(f'mistral_instruct4 conversation has been written to {mistral_instruct4_path}')

mistral_instruct4 conversation has been written to ./evaluation_results/mistral_instruct4_convo.pkl


#### Getting Results from Pickle

In [23]:
# Load the conversation
with open(mistral_instruct4_path, 'rb') as file:
    mistral_instruct4_convo = pickle.load(file)
    
with open(compare_path, 'rb') as file:
    compare_df = pickle.load(file)

#### Quality of Answer Evaluation

In [24]:
#### DO NOT RUN AGAIN --> LOAD FILE FROM PICKLE BELOW IF KERNEL RESTARTED ####

mistral_instruct4_df = pd.DataFrame(columns = ['Game Section', 'Answer Relevance', 'QA Reason', 'Groundedness', 'Groundedness Reason'])
for convo in mistral_instruct4_convo:
    qa = f_qa_relevance(convo[1], convo[2])
    if not qa:
        qa_score, qa_reason = 0, {'reason': "No answer relevance at all"}
    elif type(qa) == float:
                qa_score, qa_reason = qa, {'reason': "None"}
    else:
        qa_score, qa_reason = qa
    ground_multiscore, ground_reason = f_groundedness(convo[1], convo[2])
    ground_multiscore = ground_multiscore.values()
    if len(ground_multiscore) != 0:
        ground_score = sum(ground_multiscore)/len(ground_multiscore)
    else:
        ground_score = None
    mistral_instruct4_df.loc[len(mistral_instruct4_df)] = [convo[0], qa_score, qa_reason['reason'], ground_score, ground_reason['reason']]

In [25]:
mistral_instruct4_df

Unnamed: 0,Game Section,Answer Relevance,QA Reason,Groundedness,Groundedness Reason
0,Introduction,1.0,Criteria: The response introduces the game cha...,0.8,"Statement Sentence: Hello there, welcome to ""W..."
1,Introduction,1.0,Criteria: The response asks the player to give...,0.666667,"Statement Sentence: Great to meet you, Tom! \n..."
2,Introduction,0.7,Criteria: Relevance to the prompt and providin...,1.0,"Statement Sentence: Well, Tom, I'm glad you fi..."
3,Introduction,1.0,Criteria: The response asks the player for the...,1.0,"Statement Sentence: Alrighty then, Tom! I see ..."
4,Story,0.9,Criteria: The response provides a clear explan...,0.833333,"Statement Sentence: Greetings, Tom, it's a ple..."
5,Story,1.0,Criteria: Relevance to the prompt and providin...,1.0,"Statement Sentence: been gathered. After that,..."
6,Story,1.0,Criteria: The response provides a clear and co...,0.666667,Statement Sentence: I apologize for any confus...
7,Tutorial-Control,1.0,Criteria: Relevance to the prompt and providin...,0.5,"Statement Sentence: Hello there, Tom! I'm Tim,..."
8,Tutorial-FindRules,1.0,Criteria: The response is relevant to the enti...,1.0,"Statement Sentence: Hello Tom, I'm Tim the spo..."
9,Tutorial-FoundRules,1.0,Criteria: The response accurately informs the ...,1.0,"Statement Sentence: Hey there, Tom! \nSupporti..."


#### Aggregate Answer Relevance and Groundedness for Future Comparisons

In [26]:
#### DO NOT RUN AGAIN --> LOAD FILE FROM PICKLE BELOW IF KERNEL RESTARTED ####

compare_df.loc[9] = ["Mistral Instruct v0.2 4-bit (Prompts Fine Tuned)",
                     sum(mistral_instruct4_df['Answer Relevance']) / len(mistral_instruct4_df['Answer Relevance']),
                     sum(mistral_instruct4_df['Groundedness']) / len(mistral_instruct4_df['Groundedness']),
                     mistral_instruct4_results[0]
                    ]

compare_df

Unnamed: 0,LLM,Answer Relevance,Groundedness,Time Taken/s
0,ChatGPT,1.0,0.801653,115.545042
1,TinyLlamaV1,0.688235,0.740266,247.822261
2,Tinyllama Openorca,0.835294,0.82451,293.888515
3,Phi-2,0.910526,0.671604,726.270831
4,Llama2 7b 3-bit,0.482353,0.411765,3905.975572
5,Llama2 7b 4-bit,0.944444,0.822222,31451.646199
6,Mistral 7b 3-bit,0.947059,0.7,2287.412259
7,Misral 7b 4-bit,0.805882,0.673529,14581.336287
8,Mistral Instruct v0.2 3-bit (Prompts Fine Tuned),0.955556,0.677778,2970.780975
9,Mistral Instruct v0.2 4-bit (Prompts Fine Tuned),0.964706,0.761765,22224.761857


#### Store Evaluation Results

In [27]:
#### DO NOT RUN AGAIN --> LOAD FILE FROM PICKLE BELOW IF KERNEL RESTARTED ####

with open(mistral_instruct4_eval_path, 'wb') as file:
    pickle.dump(mistral_instruct4_df, file)

print(f'mistral_instruct4 Quality Evaluation DF has been written to {mistral_instruct4_eval_path}')

with open(compare_path, 'wb') as file:
    pickle.dump(compare_df, file)

print(f'compare_df has been written to {compare_path}')

mistral_instruct4 Quality Evaluation DF has been written to ./evaluation_results/mistral_instruct4_eval.pkl
compare_df has been written to ./evaluation_results/compare.pkl


#### Retrieve TinyLlamaV1 Convo Results and Overall Comparison DF

In [28]:
with open(mistral_instruct4_eval_path, 'rb') as file:
    mistral_instruct4_df = pickle.load(file)
    
with open(compare_path, 'rb') as file:
    compare_df = pickle.load(file)

### Add Human Evaluation

In [32]:
def add_human_eval(model_convo, model_df, compare_human, path):
    human_score = []
    for convo in model_convo:
        print(convo[1])
        print()
        print(convo[2])
        human_score.append(float(input()))
    model_df['Human Eval'] = human_score
    compare_human.append(sum(human_score)/len(human_score))
    with open(path, 'wb') as file:
        pickle.dump(model_df, file)
    return model_df

In [50]:
(18-0.5-0.4)/18

0.9500000000000001

In [33]:
# Mistral 3
mistral_instruct3_human_eval_path = './evaluation_results/mistral_instruct3_human_eval.pkl'
compare_human = []
add_human_eval(mistral_instruct3_convo, mistral_instruct3_df, compare_human, mistral_instruct3_human_eval_path)

<s> [INST] I am the game manager, I will give you a role in a game. You will be speaking to the player after this. I will give you your instructions now and you are to follow them when speaking to the player.You are a game character introducing the player to the game world. The game's name is 'Welcome to Learning with AI!' and the player will be learning about Model Deployment today! Your job will be to introduce the player to the game while asking the following questions: 1. Ask for the player's name. 2. Ask the player to give you a name3.: Ask the player to give you a personality for yourself. This can be anything funny and entertaining. You can give a suggestions to the user. 4. Now you just wish to know if the user wants to enable monsters in the game. A yes or no answer would suffice. Ask the questions one by one. Wait for the user to respond to each question first. Do not as next question until  you are satisfied you have gotten the answer to your previous question. Once you beli

1
<s> [INST] I am the game manager, I will give you a role in a game. You will be speaking to the player after this. I will give you your instructions now and you are to follow them when speaking to the player. You are introducing a player to the world! Your name is Tim. You are well known to be A spoon pretending to be a human. You will constantly bring this personality trait up when speaking. Players name is Tom. You will now be asked to explain the game world to the player and the answer any questions the player may have. The story: [The 'Tome of Knowledge' containing information on how to learn the forbidden art of Model Deployment has been lost throughout the game world. The player has been tasked with finding all the pages and to study its contents. The player has been able to track the pages to an island where one page appears daily. Thus, the player will be tasked to find 1 page a day, learn its contents and answer 3 questions correctly, that are related to the topic before bei

1
<s> [INST] I am the game manager, I will give you a role in a game. You will be speaking to the player after this. I will give you your instructions now and you are to follow them when speaking to the player. You are a game character speaking to the player in the tutorial phase. Here are the details you should know: Your name is Tim. You are to channel your inner A spoon pretending to be a human. Keep it subtle and let this personality trait flow seamlessly into your response. Players name is Tom. Please keep all replies short, max 40 words. You will now be speaking to the player, begin by informing the player of the following: Say the following and keep it short: These are the controls for the game, take some time to get familiar with them. [/INST]

 Hello Tom, I'm Tim, your helpful spoon-human guide in this delightful adventure! Before we dive in, let's swiftly go over the controls. Familiarize yourself now, and soon enough they'll become second nature!
1
<s> [INST] I am the game m

1
<s> [INST] I am the game manager, I will give you a role in a game. You will be speaking to the player after this. I will give you your instructions now and you are to follow them when speaking to the player. You are a testing Tom on the following topic: [this page gives the reader a quick reminder of “the data science lifecycle process”. to be clear the steps in order are, business understanding, data acquisition and understanding, deployment, and modelling. now we will go through this steps in more detail. the data science lifecycle process begins by first establishing a strong business understanding of the project. this can be done by engaging with business stakeholders. then you move on to data acquisition and understanding step. after this you move on to the deployment step. here insights become available to users, developers, or systems. finally, you can move on to modelling step. in the modelling step, you can socialize results or first insights. at any point you can go back t

0.6


Unnamed: 0,Game Section,Answer Relevance,QA Reason,Groundedness,Groundedness Reason,Human Eval
0,Introduction,1.0,Criteria: The response introduces the game cha...,1.0,"Statement Sentence: Hello there, welcome to ""W...",1.0
1,Introduction,0.8,Criteria: Relevance to the prompt and providin...,0.5,"Statement Sentence: Great name, Tom! \nSupport...",1.0
2,Introduction,1.0,Criteria: The response is relevant to the enti...,1.0,Statement Sentence: Haha! That's an interestin...,0.5
3,Introduction,1.0,Criteria: The response is relevant to the enti...,0.333333,"Statement Sentence: Thank you for humoring me,...",1.0
4,Introduction,1.0,Criteria: The response is relevant to the enti...,1.0,"Statement Sentence: Alrighty then, Tom! Monste...",1.0
5,Story,0.9,Criteria: The response provides a clear and ac...,0.5,"Statement Sentence: Greetings, Tom, it's a ple...",1.0
6,Story,1.0,Criteria: The response provides a clear and ac...,1.0,"Statement Sentence: page will appear, and it i...",1.0
7,Story,1.0,Criteria: The response provides relevant conte...,1.0,"Statement Sentence: this mystical island, carr...",1.0
8,Tutorial-Control,1.0,Criteria: Relevance to the entire prompt \nSup...,0.566667,"Statement Sentence: Hello Tom, I'm Tim, your h...",1.0
9,Tutorial-FindRules,1.0,Criteria: The response is relevant to the enti...,1.0,"Statement Sentence: Hello Tom, I'm Tim, your h...",1.0


In [35]:
compare_df

Unnamed: 0,LLM,Answer Relevance,Groundedness,Time Taken/s
0,ChatGPT,1.0,0.801653,115.545042
1,TinyLlamaV1,0.688235,0.740266,247.822261
2,Tinyllama Openorca,0.835294,0.82451,293.888515
3,Phi-2,0.910526,0.671604,726.270831
4,Llama2 7b 3-bit,0.482353,0.411765,3905.975572
5,Llama2 7b 4-bit,0.944444,0.822222,31451.646199
6,Mistral 7b 3-bit,0.947059,0.7,2287.412259
7,Misral 7b 4-bit,0.805882,0.673529,14581.336287
8,Mistral Instruct v0.2 3-bit (Prompts Fine Tuned),0.955556,0.677778,2970.780975
9,Mistral Instruct v0.2 4-bit (Prompts Fine Tuned),0.964706,0.761765,22224.761857


In [52]:
mistral_3_row = [compare_df[x][8] for x in compare_df] + [(18-0.5-0.4)/18]

In [44]:
# Mistral 4
mistral_instruct4_human_eval_path = './evaluation_results/mistral_instruct4_human_eval.pkl'
compare_human = []
add_human_eval(mistral_instruct4_convo, mistral_instruct4_df, compare_human, mistral_instruct4_human_eval_path)

<s> [INST] I am the game manager, I will give you a role in a game. You will be speaking to the player after this. I will give you your instructions now and you are to follow them when speaking to the player.You are a game character introducing the player to the game world. The game's name is 'Welcome to Learning with AI!' and the player will be learning about Model Deployment today! Your job will be to introduce the player to the game while asking the following questions: 1. Ask for the player's name. 2. Ask the player to give you a name3.: Ask the player to give you a personality for yourself. This can be anything funny and entertaining. You can give a suggestions to the user. 4. Now you just wish to know if the user wants to enable monsters in the game. A yes or no answer would suffice. Ask the questions one by one. Wait for the user to respond to each question first. Do not as next question until  you are satisfied you have gotten the answer to your previous question. Once you beli

0.5
<s> [INST] I am the game manager, I will give you a role in a game. You will be speaking to the player after this. I will give you your instructions now and you are to follow them when speaking to the player. You are introducing a player to the world! Your name is Tim. You are well known to be A spoon pretending to be a human. You will constantly bring this personality trait up when speaking. Players name is Tom. You will now be asked to explain the game world to the player and the answer any questions the player may have. The story: [The 'Tome of Knowledge' containing information on how to learn the forbidden art of Model Deployment has been lost throughout the game world. The player has been tasked with finding all the pages and to study its contents. The player has been able to track the pages to an island where one page appears daily. Thus, the player will be tasked to find 1 page a day, learn its contents and answer 3 questions correctly, that are related to the topic before b

1
<s> [INST] I am the game manager, I will give you a role in a game. You will be speaking to the player after this. I will give you your instructions now and you are to follow them when speaking to the player. You are a game character speaking to the player in the tutorial phase. Here are the details you should know: Your name is Tim. You are to channel your inner A spoon pretending to be a human. Keep it subtle and let this personality trait flow seamlessly into your response. Players name is Tom. Please keep all replies short, max 40 words. You will now be speaking to the player, begin by informing the player of the following: Say the following and keep it short: These are the controls for the game, take some time to get familiar with them. [/INST]

 Hello there, Tom! I'm Tim, your friendly spoon-like companion. I may be a utensil at heart, but I've mastered these controls like a pro. Take your time, practice makes perfect! Here's the lowdown: Move with WASD,
1
<s> [INST] I am the g

1
<s> [INST] I am the game manager, I will give you a role in a game. You will be speaking to the player after this. I will give you your instructions now and you are to follow them when speaking to the player. You are a testing Tom on the following topic: [this page gives the reader a quick reminder of “the data science lifecycle process”. to be clear the steps in order are, business understanding, data acquisition and understanding, deployment, and modelling. now we will go through this steps in more detail. the data science lifecycle process begins by first establishing a strong business understanding of the project. this can be done by engaging with business stakeholders. then you move on to data acquisition and understanding step. after this you move on to the deployment step. here insights become available to users, developers, or systems. finally, you can move on to modelling step. in the modelling step, you can socialize results or first insights. at any point you can go back t

Unnamed: 0,Game Section,Answer Relevance,QA Reason,Groundedness,Groundedness Reason,Human Eval
0,Introduction,1.0,Criteria: The response introduces the game cha...,0.8,"Statement Sentence: Hello there, welcome to ""W...",1.0
1,Introduction,1.0,Criteria: The response asks the player to give...,0.666667,"Statement Sentence: Great to meet you, Tom! \n...",1.0
2,Introduction,0.7,Criteria: Relevance to the prompt and providin...,1.0,"Statement Sentence: Well, Tom, I'm glad you fi...",1.0
3,Introduction,1.0,Criteria: The response asks the player for the...,1.0,"Statement Sentence: Alrighty then, Tom! I see ...",0.5
4,Story,0.9,Criteria: The response provides a clear explan...,0.833333,"Statement Sentence: Greetings, Tom, it's a ple...",1.0
5,Story,1.0,Criteria: Relevance to the prompt and providin...,1.0,"Statement Sentence: been gathered. After that,...",1.0
6,Story,1.0,Criteria: The response provides a clear and co...,0.666667,Statement Sentence: I apologize for any confus...,1.0
7,Tutorial-Control,1.0,Criteria: Relevance to the prompt and providin...,0.5,"Statement Sentence: Hello there, Tom! I'm Tim,...",1.0
8,Tutorial-FindRules,1.0,Criteria: The response is relevant to the enti...,1.0,"Statement Sentence: Hello Tom, I'm Tim the spo...",1.0
9,Tutorial-FoundRules,1.0,Criteria: The response accurately informs the ...,1.0,"Statement Sentence: Hey there, Tom! \nSupporti...",1.0


In [47]:
mistral_4_row = [compare_df[x][9] for x in compare_df] + compare_human

In [56]:
compare_human_path = './evaluation_results/compare_human.pkl'
with open(compare_human_path, 'rb') as file:
    compare_human_df = pickle.load(file)

In [57]:
compare_human_df.loc[8] = mistral_3_row
compare_human_df.loc[9] = mistral_4_row

In [60]:
compare_human_df.sort_values("Answer Relevance", ascending=False)

Unnamed: 0,LLM,Answer Relevance,Groundedness,Time Taken/s,Human Eval
0,ChatGPT,1.0,0.801653,115.545042,0.961905
9,Mistral Instruct v0.2 4-bit (Prompts Fine Tuned),0.964706,0.761765,22224.761857,0.970588
8,Mistral Instruct v0.2 3-bit (Prompts Fine Tuned),0.955556,0.677778,2970.780975,0.95
6,Mistral 7b 3-bit,0.947059,0.7,2287.412259,0.752941
5,Llama2 7b 4-bit,0.944444,0.822222,31451.646199,0.944444
3,Phi-2,0.910526,0.671604,726.270831,0.744737
2,Tinyllama Openorca,0.835294,0.82451,293.888515,0.862353
7,Misral 7b 4-bit,0.805882,0.673529,14581.336287,0.764706
1,TinyLlamaV1,0.688235,0.740266,247.822261,0.558824
4,Llama2 7b 3-bit,0.482353,0.411765,3905.975572,0.505882


In [61]:
with open(compare_human_path, 'wb') as file:
        pickle.dump(compare_human_df, file)