# RAGsyllabi testing interface

## PIP installs
Make sure to reload session (not runtime) after doing pip installs. Only variables are lost, packages remain until runtime is disconnected or deleted.

In [1]:
# required pip installs, reload session (not kernel/runtime) afterwards.
# !pip install bitsandbytes accelerate einops # for 4- and 8 bit quantization
# !pip install datasets                # access to the the document repository on huggingface
# !pip install sentence_transformers

In [2]:
# there was some model that I removed from the list that required these also,
# if you get complaints then restart the runtime and run this cell un-commented before the cell above

#!pip install torch==2.1.0
#!pip install xformers==0.0.22.post4 --index-url https://download.pytorch.org/whl/cu118  # provides building blocks to some models


## Imports and model loading

In [3]:
#!wget URL

#from func_script import load_hf_data, embed_query, cosine_similarity, get_top_k, reranker, GeneratorParams, GeneratorUtil, TestingUtilTexts
from func_script import *
from time import time
from update_eval_set import *

In [4]:
# printing an indexed list of models to choose from
# these include test-cases that might not work as of submission
gen_params = GeneratorParams()
for i, model in enumerate(gen_params.models):
  print(i, model)

0 ('facebook/bart-large-mnli', 'auto')
1 ('Intel/neural-chat-7b-v3-1', 'causal_4bit')
2 ('llmware/bling-sheared-llama-2.7b-0.1', 'causal_4bit')
3 ('filipealmeida/Mistral-7B-Instruct-v0.1-sharded', 'causal_4bit')
4 ('vilsonrodrigues/falcon-7b-instruct-sharded', 'causal_4bit')
5 ('llmware/bling-sheared-llama-1.3b-0.1', 'causal_4bit')
6 ('mistralai/Mistral-7B-Instruct-v0.2', 'causal_4bit')
7 ('llmware/dragon-mistral-7b-v0', 'causal_4bit')
8 ('mistralai/Mixtral-8x7B-Instruct-v0.1', '8x7b_4bit')


The subjectively best language generator so far is `Intel/neural-chat-7b-v3-1` and the 4-bit-quantized version only needs 4GB system RAM and 5.3GB GPU RAM (including all other modules in the RAG system). Generating a response takes between 20-70 seconds on colab T4 GPU.

### Choose LLM-model to load

In [14]:
# choose a generator model from list above
# if you have less than 13GB system RAM available, use sharded models or
# the smaller 1-3B parameter models or Intel's neural-chat that is already sharded with largest file 9.95GB.
selected_model_idx = 1 # Recommended model: 1 ('Intel/neural-chat-7b-v3-1', 'causal_4bit')

# load the chosen generator model
generator = GeneratorUtil(params=gen_params, selected_model_idx=selected_model_idx)
# use line below if you want to skip quantization
#generator = GeneratorUtil(params=gen_params, model_name=gen_params.models[selected_model_idx][0], hf_model='causal')

# load data
df_text, df_embeddings = load_hf_data()
reference_embeddings = df_embeddings['Course content'] # right now we only use course content for similarity search

# load embedding model
emb_model_name = 'intfloat/e5-large-v2'

tokenizer = AutoTokenizer.from_pretrained(emb_model_name)
emb_model = AutoModel.from_pretrained(emb_model_name)

# load reranker
cross_encoder_name = 'cross-encoder/ms-marco-MiniLM-L-12-v2'
cross_encoder_model = CrossEncoder(cross_encoder_name, max_length=512)


tokenizer_config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/145 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

## Defining main

In [15]:
def main(query, ref_embeddings, docs, tokenizer, emb_model, k,
         cross_encoder_model, generator, timer=False,
         do_exact_search=True,
         log_retriever=True, log_generator=True,
         query_type="query_type", truth="truth",
         log_ret_path="retriever_eval.json", log_gen_path="system_eval.json", model_info_path="model_info.json"):

  # run exact search
  matched_codes = exact_search(query, docs)
  # if matched codes contains any course codes, skip similarity search
  if matched_codes and do_exact_search:
    print(f'Found corse code "{matched_codes[0]}" in user query.')
    top_d = '\n'.join([text for text in docs[docs['course_code'] == matched_codes[0]].iloc[0,[3,4,5,7,8]]]) # just get the course content for the moment, could otherwise return the full syllabus and rank each section with the reranker
    course_codes = matched_codes[0]

  else:
    print(f'Found no course codes in user query, running similarity search...')
    # embed query
    query_embedding = embed_query(query, tokenizer, emb_model)
    # run the similarity search
    similarities = cosine_similarity(ref_embeddings, query_embedding)
    # get the top k docs based on similarity measure
    top_docs_text = get_top_k(df_text, similarities, k)
    # rerank docs
    reranked_top_docs = reranker(query, top_docs_text, cross_encoder_model)
    # get the top one (for now)
    top_d = '\n'.join(text for text in reranked_top_docs.iloc[:3]['Course content'])
    course_codes = reranked_top_docs["course_code"].tolist()

  # Add k and exact_search to model parameters for logging
  generator.params.add("k", k)
  generator.params.add("exact_search", do_exact_search)

  # generate response
  print('Generating...')
  start_time = time()
  response = generator.generate_response(generator.params, top_d, query)
  end_time = time()
  print('Response generated!')
  if timer:
    print('Time taken for generation:', end_time-start_time, 'seconds.')
  # Log the reranked course code of the retriever with the query
  if log_retriever:
    update_json(query, course_codes, top_d, generator, query_type, truth, log_ret_path, model_info_path)

  # Log the model response with the query
  if log_generator:
    update_json(query, response, top_d, generator, query_type, truth, log_gen_path, model_info_path)
  return response, top_d #, top_docs_text, similarities



## Query construction

In [16]:
# testing queries
testing_utility = TestingUtilTexts()

specific_queries = testing_utility.specific_queries
general_queries = testing_utility.general_queries

print('Specific queries:')
for i, query in enumerate(specific_queries):
  print(i, query)
print()
print('General queries:')
for i, query in enumerate(general_queries):
  print(i, query)


Specific queries:
0 Does course [specific course] cover the transformer architecture?
1 What are the learning objectives outlined in the syllabus for [specific course]?
2 Can you provide a summary of the assessment methods used in [specific course]?
3 Does the syllabus for [specific course] mention any prerequisites or recommended prior knowledge?
4 How does the syllabus for [specific course] outline the grading criteria?

General queries:
0 I want to study economics, what courses should I take?
1 What advanced mathematics courses are available for engineering students?
2 What courses are recommended for international business students?
3 Are there any philosophy courses that focus on ethics and moral reasoning?
4 Are there any courses for students interested in artificial intelligence?


In [17]:
# specify specific or general:
query_type = 'specific'
# idx from list above
query_idx = 0
# specific course asked about
course = 'DIT247'
# replace [specific course] with actual course code
query = testing_utility.make_test_query(course, specific_queries[query_idx])
print(query)


Does course DIT247 cover the transformer architecture?


In [18]:
# or write your own query:
#query_type = 'specific' # 'general'
#query = 'Can you tell me what the course Machine Learning for Natural Language Processing is all about?'
#query = 'Can you tell me what the course DIT247 is all about?'
#query = 'Is there any course that introduces the pytorch python library and key machine learning concepts?'


## Main

In [19]:
generator.model.name

'Intel/neural-chat-7b-v3-1'

In [20]:
# set generator parameters and print system prompt
# Turn on beam search by setting DO_SAMPLE to True
generator.params.MAX_NEW_TOKENS = 400
generator.params.TEMP = 0.3
generator.params.DO_SAMPLE = False
generator.params.NUM_BEAMS = 2
generator.params.SKIP_SPECIAL_TOKENS = False
generator.params.ADD_SPECIAL_TOKENS = False
generator.params.system_input = generator.params.system_inputs_list[1]
print(generator.params.system_input)

You are a large language model known as ChatGPT developed by OpenAI. 
      Your job is to act as an assistant to potential students who are looking for information
      about courses. The courses are offered by the University of Gothenburg. You will be provided
      relevant context in the form of course syllabi. You will respond to student questions in a professional manner.
      Base your answer on the given context. Please do a good job as your work is very important to my career.


In [21]:
# top documents to retrieve in similarity search
k = 20

# generate a response and save it and the context to variables
response, top_d = main(query, reference_embeddings, df_text, tokenizer, emb_model,
                        k, cross_encoder_model, generator, timer=True, query_type=query_type, truth=course)



Found corse code "DIT247" in user query.
Generating...




Response generated!
Time taken for generation: 91.61635613441467 seconds.


In [22]:
print('### CONTEXT:', '\n' + top_d)
print('### RESPONSE:', '\n' + response)

### CONTEXT: 
passage: course code: dit247, entry requirements to be eligible to the course, the student should have a bachelor's degree in any subject. in addition, the course requires: •7.5 credits of courses in programming or equivalent, •a course including probability and statistics, such as dit862 statistical methods for data science or msg810 mathematical statistics and discrete mathematics, •a first course in machine learning, such as dit866 applied machine learning, dit381 algorithms for machine learning and inference, or msa220 statistical learning for big data. applicants must prove knowledge of english: english 6/english b or the equivalent levelof an internationally recognized test, for example toefl, ielts.
passage: course code: dit247, learning outcomes on successful completion of the course the student will be able to: knowledge and understanding •describe the fundamentals of storing textual data for the world's languages, •describe the most common types of natural langu

## Testing runs
Un-comment and run the cells below to run test-cases

In [23]:
# course_codes = ['DIT247', 'NEK306', 'RT2519']  # List of course codes
# query_type = 'specific'
# do_exact_search = True

# for spec_query in testing_utility.specific_queries:
#   for course in course_codes:
#     # replace [specific course] with actual course code
#     query = testing_utility.make_test_query(course, spec_query)
#     # print(query)
#     response, top_d = main(query, reference_embeddings, df_text, tokenizer, emb_model,
#                            k, cross_encoder_model, generator, timer=True,
#                            do_exact_search=do_exact_search, query_type=query_type, truth=course)
#     # print('QUERY:', spec_query)
#     # print('CONTEXT:', top_d)
#     # print('RESPONSE:', response)


In [24]:
# course_codes = ['DIT247', 'NEK306', 'RT2519']  # List of course codes
# query_type = 'specific'
# do_exact_search = False

# for spec_query in testing_utility.specific_queries:
#   for course in course_codes:
#     # replace [specific course] with actual course code
#     query = testing_utility.make_test_query(course, spec_query)
#     # print(query)
#     response, top_d = main(query, reference_embeddings, df_text, tokenizer, emb_model,
#                            k, cross_encoder_model, generator, timer=False,
#                            do_exact_search=do_exact_search, query_type=query_type, truth=course)
#     # print('QUERY:', spec_query)
#     # print('CONTEXT:', top_d)
#     # print('RESPONSE:', response)


In [25]:
# query_type = 'general'

# for query in testing_utility.general_queries:
#   # print(query)
#   response, top_d = main(query, reference_embeddings, df_text, tokenizer, emb_model,
#                          k, cross_encoder_model, generator, timer=False,
#                          do_exact_search=False, query_type=query_type)
#   # print('QUERY:', spec_query)
#   # print('CONTEXT:', top_d)
#   # print('RESPONSE:', response)