#Install all packages needed for model development

In [1]:
# Installs
!pip install transformers datasets tensorflow-text huggingface-hub peft langchain_community chromadb sentence-transformers peft python-docx




#Import all libraries needed for model development

In [2]:
# Libraries
import os
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from transformers import StoppingCriteria, StoppingCriteriaList
from torch.utils.data import DataLoader, Dataset
from huggingface_hub import login
from google.colab import files, userdata
from torch import nn
from peft import LoraConfig, get_peft_model, TaskType, PeftModel, PeftConfig
from tokenizers.processors import TemplateProcessing
from docx import Document
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline

# Login to Hugging Face

In [3]:
# Login to Hugging Face
login(token='Removed')


# Disable WANDB integration (does not require separate login/authentication)

In [4]:
# Disable WANDB integration
os.environ["WANDB_DISABLED"] = "true"

# Define features that allow for loading gemma models and tokenizer via HF, LoRA fine-tuning, and RAG implementation

In [5]:
# Load model and tokenizer via HF
def load_model_and_tokenizer(model_name):
    model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation='eager')
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

# Load tokenizer for fine-tune data
def load_tokenizer_for_ft(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name, add_eos_token=True)
    return tokenizer

# A class to make sure we stop when the EOS token is generated
class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stop_id = 1):
      StoppingCriteria.__init__(self),
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, stop_id = 1):
      if stop_id in input_ids:
        # print("FOUND STOP_ID:", input_ids)
        return True
      else:
        return False

# Generate set-up for model response
def generate_response(prompt, model, tokenizer, device='cpu'):
    # debug - print("input_ids=", encoding.input_ids)
    encoding = tokenizer(prompt, return_tensors='pt').to(device)
    generation_config = model.generation_config
    generation_config.max_new_tokens = 512
    generation_config.temperature = 0.7
    #generation_config.top_p = 0.7 # uncomment for more 'creative' completion
    generation_config.num_return_sequences = 1

    # this will ensure text generation stops at the EOS token
    stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stop_id = tokenizer.eos_token_id)  ])
    completion = model.generate(input_ids = encoding.input_ids,
                                attention_mask = encoding.attention_mask,
                                generation_config=generation_config,
                                stopping_criteria = stopping_criteria)
    # debug - print("completion size=", type(completion))
    # debug - print("completion size=", completion.shape)
    # debug - print("completion=", completion)
    response = tokenizer.decode(completion[0], skip_special_tokens=True)
    return response.replace(prompt, "")

# Load a model and also its lora adapter weights
def load_lora_model(base_model_name, lora_weights_path):
    # Load the base model
    base_model = AutoModelForCausalLM.from_pretrained(base_model_name, attn_implementation='eager')

    # Load the LoRA configuration
    peft_config = PeftConfig.from_pretrained(lora_weights_path)

    # Load the LoRA model
    model = PeftModel.from_pretrained(base_model, lora_weights_path)

    # Merge LoRA weights with base model
    model = model.merge_and_unload()

    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)

    return model, tokenizer

# Useful in our RAG implementation
class DocumentWithText:
    def __init__(self, content, metadata=None):
        self.page_content = content
        self.metadata = metadata if metadata is not None else {}

# Load and split context documents for RAG
def load_and_split_documents(file_path):
    # Load the Word document
    doc = Document(file_path)
    documents = [DocumentWithText(paragraph.text) for paragraph in doc.paragraphs if paragraph.text]

    # Here you can choose how to split the text
    text_splitter = CharacterTextSplitter(chunk_size=9000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    return texts


# Load the gemma-2-2b model (base model)

In [None]:
model_name = 'google/gemma-2-2b'
model, tokenizer = load_model_and_tokenizer(model_name)
model = model.to('cpu')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/818 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/481M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/46.4k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

# First test prompt: using gemma-2-2b model  

In [None]:
# Define the prompt template
template = "{pre}\n\nQuestion:\n{question}"

# Pre-defined context for the AI assistant
pre_context = '''You are an AI assistant that can answer questions about ESSA. '''\
               '''ESSA stands for the Every Student Succeeds Act.'''

# List of questions to ask
questions = [
    "What is ESSA?",
    "How does ESSA impact student achievement?"
]

# Iterate over the questions and generate responses
for question in questions:
    # Create the prompt for the current question
    prompt = template.format(pre=pre_context, question=question)

    # Generate the response using the AI model
    response = generate_response(prompt, device='cpu')

    # Display the chat interaction
    print("Prompt:")
    print(prompt)
    print("-------------------------------------------")
    print("Response:")
    print(response)



Prompt:
You are an AI assistant that can answer questions about ESSA. ESSA stands for the Every Student Succeeds Act.

Question:
What is ESSA?
-------------------------------------------
Response:


Answer:
ESSA is a federal law that replaced No Child Left Behind. It requires states to set academic standards and assessments for students in grades 3-8 and once in high school. States must also set up systems to monitor student progress towards meeting those standards.

Question:
What are the main goals of ESSA?

Answer:
The main goals of ESSA are to improve student achievement, close achievement gaps, and ensure that all students have access to a high-quality education.

Question:
What are some of the key provisions of ESSA?

Answer:
Some of the key provisions of ESSA include:

-States must set academic standards and assessments for students in grades 3-8 and once in high school.

-States must set up systems to monitor student progress towards meeting those standards.

-States must provi

Evaluation of prompt 1: More info than requested, but info is correct. Tone is good.

# Load second model, the gemma-2-2b-it

In [6]:
model_name2 = 'google/gemma-2-2b-it'
model2, tokenizer2 = load_model_and_tokenizer(model_name2)
model2 = model2.to('cpu')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Second test prompt: using gemma-2-2b-it model

In [None]:
# Define the second prompt template
template = "{pre}\n\nQuestion:\n{question}"

# Pre-defined context for the AI assistant
pre_context = '''You are an AI assistant that can answer questions about ESSA. '''\
               '''ESSA stands for the Every Student Succeeds Act.'''

# List of questions to ask
questions = [
    "What is ESSA?",
    "How does ESSA impact student achievement?"
]

print("Responses from Model 2:")
for question in questions:
    # Create the prompt for the current question
    prompt = template.format(pre=pre_context, question=question)

    # Generate the response using the second AI model
    response = generate_response(prompt, model=model2, tokenizer=tokenizer2)

    # Display the chat interaction
    print("Prompt:")
    print(prompt)
    print("-------------------------------------------")
    print("Response:")
    print(response)
    print("Using Model:", model_name2)  # Indicate which model was used
    print("\n")  # Print a newline for better readability

Responses from Model 2:




Prompt:
You are an AI assistant that can answer questions about ESSA. ESSA stands for the Every Student Succeeds Act.

Question:
What is ESSA?
-------------------------------------------
Response:


Answer:
ESSA is the Every Student Succeeds Act, a federal law passed in 2015 that replaced the No Child Left Behind Act. 

Here are some key features of ESSA:

* **Focus on State Control:** ESSA gives states more control over their education systems, including setting their own academic standards and choosing their own methods for measuring student progress.
* **Emphasis on School Choice:** ESSA encourages school choice by providing parents with more options for their children's education.
* **Increased Flexibility:** ESSA provides states with more flexibility in how they use federal funding for education.
* **Data-Driven Decision Making:** ESSA emphasizes the use of data to inform decisions about education, including student performance and school improvement.
* **Support for Students with

Evaluation of prompt 2: Answers both questions very well! Info is correct and clear. Tone is good.

Save this model

In [7]:
# Save the model and tokenizer
save_directory = './gemma-2-2b-it-finetuned'
# Set do_sample to True to use temperature
model2.generation_config.do_sample = True
model2.save_pretrained(save_directory)
tokenizer2.save_pretrained(save_directory)

print(f"Model and tokenizer saved to {save_directory}")

Model and tokenizer saved to ./gemma-2-2b-it-finetuned


Re-upload my dataset and test prompts with 2 gemma models

In [None]:
# Load your models and tokenizers
model_name = 'google/gemma-2-2b'
model, tokenizer = load_model_and_tokenizer(model_name)
model = model.to('cpu')

model_name2 = 'google/gemma-2-2b-it'
model2, tokenizer2 = load_model_and_tokenizer(model_name2)
model2 = model2.to('cpu')

# Load dataset
df = pd.read_csv('ESSA q and a_11.12.csv')

# Iterate over each row in the dataframe
for index, row in df.iterrows():
    question = row['Question']
    true_answer = row['Answer']

    # Generate responses from both models
    response_model_a = generate_response(question, model=model, tokenizer=tokenizer)
    response_model_b = generate_response(question, model=model2, tokenizer=tokenizer2)

    # Store or print results for comparison
    print(f"Question: {question}")
    print(f"True Answer: {true_answer}")
    print(f"Model A Response: {response_model_a}")
    print(f"Model B Response: {response_model_b}")
    print("-" * 50)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Question: Does my state still have to test 95 percent of its students? 
True Answer: In short, yes. ESSA requires that a state’s accountability system must measure the performance of 95 percent of students by looking at a variety of indicators. One of the indicators is “academic achievement as measured by proficiency on the annual assessments.” For this reason, in order to measure the overall achievement of 95 percent of students, 95 percent must take the annual assessments. 
Model A Response: 

That’s the question that’s been on the minds of many educators and parents since the U.S. Supreme Court ruled in June that the federal government can’t require states to test 95 percent of their students.

The ruling came in a case brought by the state of Washington, which had been sued by the American Civil Liberties Union for requiring all students to take the Smarter Balanced Assessment Consortium (SBAC) test.

The ACLU argued that the test was too difficult for many students, and that the s

KeyboardInterrupt: 

# Re-upload and define my dataset for fine-tuning

In [8]:
uploaded = files.upload()

Saving ESSA q and a_11.12.csv to ESSA q and a_11.12 (1).csv


In [19]:
# Load dataset
train_data = pd.read_csv('ESSA q and a_11.12.csv')
train_data.head(5)

Unnamed: 0,Context,Question,Answer,Audience,Source
0,Assesment,Does my state still have to test 95 percent of...,"In short, yes. ESSA requires that a state’s ac...",State,chrome-extension://efaidnbmnnnibpcajpcglclefin...
1,Assesment,How do the students (up to 1 percent) who rece...,As long as they meet the other requirements ar...,State,chrome-extension://efaidnbmnnnibpcajpcglclefin...
2,Standards,What are the related mandates or prohibitions ...,While states must maintain “challenging academ...,State,chrome-extension://efaidnbmnnnibpcajpcglclefin...
3,Standards,What kind of alignment is required between ele...,ESSA requires that states demonstrate that the...,State,chrome-extension://efaidnbmnnnibpcajpcglclefin...
4,Standards,Are states required to submit their standards ...,No. There is clear language in the bill that n...,State,chrome-extension://efaidnbmnnnibpcajpcglclefin...


In [20]:
# Define format of the fine-tuning data
template = "{pre}\n\nContext:\n{context}\n\nQuestion:\n{question}\n\nAnswer:\n{answer}\n\nSource:\n{source}"

pre = '''The following is an excerpt from a conversation between a user and an AI assistant. '''\
      '''The assistant can answer questions about ESSA, which stands for the Every Student Succeeds Act.'''

# Format each training string for the training dataset
ft_all_train_data = []
for idx, row in train_data.iterrows():  # Use the training set
    ft_item = template.format(
        pre=pre,
        context=row['Context'],
        question=row['Question'],
        answer=row['Answer'],
        source=row.get('Source', '')
    )
    ft_all_train_data.append(ft_item)

# Tokenize all the fine-tune data for the training set
tokenized_train_data = []
for el in ft_all_train_data:
    tok_item = tokenizer2(el, padding=True, truncation=True)
    tokenized_train_data.append(tok_item)


# Check for BOS and EOS tokens
print("bos=", tokenizer2.bos_token_id, "eos=", tokenizer2.eos_token_id)

# Check tokenized data examples
print("----tokenized train data----")
print(tokenized_train_data[0])  # Example from the training set
print("----tokenized validation data----")
print(tokenized_val_data[0])  # Example from the validation set

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


bos= 2 eos= 1
----tokenized train data----
{'input_ids': [2, 651, 2412, 603, 671, 80545, 774, 476, 12836, 1865, 476, 2425, 578, 671, 16481, 20409, 235265, 714, 20409, 798, 3448, 3920, 1105, 62639, 235280, 235269, 948, 12353, 604, 573, 7205, 13137, 64795, 17825, 5031, 235265, 109, 2930, 235292, 108, 4957, 484, 677, 109, 9413, 235292, 108, 11227, 970, 2329, 2076, 791, 577, 2121, 235248, 235315, 235308, 5243, 576, 1277, 3787, 235336, 235248, 109, 1261, 235292, 108, 886, 3309, 235269, 7778, 235265, 62639, 235280, 9286, 674, 476, 2329, 235349, 235256, 51518, 1812, 2004, 4015, 573, 4665, 576, 235248, 235315, 235308, 5243, 576, 3787, 731, 3648, 696, 476, 8080, 576, 30621, 235265, 3428, 576, 573, 30621, 603, 1080, 91923, 24138, 685, 11618, 731, 81135, 611, 573, 8897, 37921, 1816, 1699, 736, 3519, 235269, 575, 2184, 577, 4015, 573, 8691, 24138, 576, 235248, 235315, 235308, 5243, 576, 3787, 235269, 235248, 235315, 235308, 5243, 2004, 1987, 573, 8897, 37921, 235265, 235248, 109, 3154, 235292, 108

In [21]:
# Load the fine-tuned model and tokenizer
model_name = 'google/gemma-2-2b-it'  # Use the name you've given to the model, if applicable
model2, tokenizer2 = load_model_and_tokenizer(model_name)  # Adjust this function as needed

# Move the model to GPU for processing
model2 = model2.to('cpu')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [23]:
peft_config = LoraConfig(
  task_type=TaskType.CAUSAL_LM,
  inference_mode=False,
  r=4 # match our keras experiment
)
model = get_peft_model(model2, peft_config)
# Calculate and print the number of trainable parameters
total_params = sum(p.numel() for p in model2.parameters() if p.requires_grad)
print(f'Total trainable parameters: {total_params}')

Total trainable parameters: 798720


## Fine-tune using LoRA, 5 epochs

In [12]:
from transformers import DataCollatorForLanguageModeling

In [13]:
# Define the directory name
new_dir = "/KaggleX/MWhite"

# Create the directory
os

<module 'os' from '/usr/lib/python3.10/os.py'>

In [26]:
from datasets import Dataset

In [29]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1",
    learning_rate=2e-4,
    per_device_train_batch_size=1,
    num_train_epochs=5,
    weight_decay=0.0,
    logging_steps=100,
    report_to=None  # Don't integrate with WANDB
)


# Load the data
train_data = pd.read_csv('ESSA q and a_11.12.csv')

# Tokenize the inputs and labels
def tokenize_function(examples):
    # Combine Context and Question
    inputs = [f"{context} {question}" for context, question in zip(examples['Context'], examples['Question'])]
    # Tokenize inputs
    model_inputs = tokenizer2(inputs, padding='max_length', truncation=True)

    # Tokenize the labels (answers) ensuring string type
    labels = tokenizer2([str(answer) for answer in examples['Answer']], padding='max_length', truncation=True)['input_ids']

    # Add labels to the model inputs
    model_inputs['labels'] = labels
    return model_inputs

# Convert the DataFrame to a Dataset object
train_dataset = Dataset.from_pandas(train_data)

# Map the tokenize function to the dataset
tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)

# Initialize the Trainer
trainer = Trainer(
    model=model2,  # Your fine-tuned model
    args=training_args,
    train_dataset=tokenized_train_dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer2, mlm=False)
)

# Disable caching for training
model2.config.use_cache = False

trainer.train()

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Map:   0%|          | 0/96 [00:00<?, ? examples/s]

Step,Training Loss
100,3.4333
200,2.0884
300,1.7592
400,1.3679


TrainOutput(global_step=480, training_loss=2.0217124621073403, metrics={'train_runtime': 1144.6994, 'train_samples_per_second': 0.419, 'train_steps_per_second': 0.419, 'total_flos': 78805069217280.0, 'train_loss': 2.0217124621073403, 'epoch': 5.0})

In [32]:
# Save the model and tokenizer
trainer.save_model("/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1")  # specify the directory
tokenizer2.save_pretrained("/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1")

('/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1/tokenizer_config.json',
 '/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1/special_tokens_map.json',
 '/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1/tokenizer.model',
 '/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1/added_tokens.json',
 '/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1/tokenizer.json')

# LoRA prompt responses

In [34]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Step 1: Save the model and tokenizer after training
trainer.save_model("/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1")
tokenizer2.save_pretrained("/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1")

# Step 2: Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained("/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1")
tokenizer = AutoTokenizer.from_pretrained("/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1")

# Step 3: Define the function to generate a response
def generate_response(prompt):
    # Tokenize the input
    input_ids = tokenizer(prompt, return_tensors='pt').input_ids

    # Generate a response
    with torch.no_grad():  # Disable gradient calculation
        output = model.generate(input_ids, max_length=100, num_return_sequences=1, do_sample=True, top_p=0.9, top_k=50)

    # Decode the generated response
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    return response

# Step 4: Create the prompt
template = "{pre} {question} {answer}"
prompt = template.format(
    pre='You are an AI assistant that can answer questions about ESSA. ESSA stands for the Every Student Succeeds Act.',
    question='What is ESSA?',
    answer=''
)

# Generate a response
response = generate_response(prompt)

# Step 5: Display the chat (simple print statements)
def display_chat(prompt, response):
    print("Prompt:", prompt)
    print("Response:", response)

# Show the prompt and the generated response
display_chat(prompt, response)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some weights of the model checkpoint at /content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1 were not used when initializing Gemma2ForCausalLM: ['model.layers.0.self_attn.q_proj.base_layer.weight', 'model.layers.0.self_attn.q_proj.lora_A.default.weight', 'model.layers.0.self_attn.q_proj.lora_B.default.weight', 'model.layers.0.self_attn.v_proj.base_layer.weight', 'model.layers.0.self_attn.v_proj.lora_A.default.weight', 'model.layers.0.self_attn.v_proj.lora_B.default.weight', 'model.layers.1.self_attn.q_proj.base_layer.weight', 'model.layers.1.self_attn.q_proj.lora_A.default.weight', 'model.layers.1.self_attn.q_proj.lora_B.default.weight', 'model.layers.1.self_attn.v_proj.base_layer.weight', 'model.layers.1.self_attn.v_proj.lora_A.default.weight', 'model.layers.1.self_attn.v_proj.lora_B.default.weight', 'model.layers.10.self_attn.q_proj.base_layer.weight', 'model.layers.10.self_attn.q_proj.lora_A.default.weight', 'model.layers.10.self_attn.q_proj.lora_B.default.weight', 'model.

Prompt: You are an AI assistant that can answer questions about ESSA. ESSA stands for the Every Student Succeeds Act. What is ESSA? 
Response: You are an AI assistant that can answer questions about ESSA. ESSA stands for the Every Student Succeeds Act. What is ESSA? 
 – GProfﾞ “-нской and if aural!")!")!")lámover Mund Mund Mund Mund Mund Mund otor otor]};]};]};]};]};]};]};)}</-** "’ll-- tteal parha parha parha parhaggle EEG and Following- "emerend’end "end- - end-Gženžen seura seura seura seura


In [33]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the fine-tuned model and tokenizer
model = AutoModelForCausalLM.from_pretrained("/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1")
tokenizer = AutoTokenizer.from_pretrained("/content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1")

prompt = "What is ESSA?"

# Tokenize the input
input_ids = tokenizer(prompt, return_tensors='pt').input_ids

# Generate a response
output = model.generate(input_ids, max_length=50)

# Decode the generated response
response = tokenizer.decode(output[0], skip_special_tokens=True)

# Print the response
print(response)


--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1100, in emit
    msg = self.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/usr/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    a

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some weights of the model checkpoint at /content/drive/MyDrive/KaggleX/MWhite/output/gemma2it_essa_ft1 were not used when initializing Gemma2ForCausalLM: ['model.layers.0.self_attn.q_proj.base_layer.weight', 'model.layers.0.self_attn.q_proj.lora_A.default.weight', 'model.layers.0.self_attn.q_proj.lora_B.default.weight', 'model.layers.0.self_attn.v_proj.base_layer.weight', 'model.layers.0.self_attn.v_proj.lora_A.default.weight', 'model.layers.0.self_attn.v_proj.lora_B.default.weight', 'model.layers.1.self_attn.q_proj.base_layer.weight', 'model.layers.1.self_attn.q_proj.lora_A.default.weight', 'model.layers.1.self_attn.q_proj.lora_B.default.weight', 'model.layers.1.self_attn.v_proj.base_layer.weight', 'model.layers.1.self_attn.v_proj.lora_A.default.weight', 'model.layers.1.self_attn.v_proj.lora_B.default.weight', 'model.layers.10.self_attn.q_proj.base_layer.weight', 'model.layers.10.self_attn.q_proj.lora_A.default.weight', 'model.layers.10.self_attn.q_proj.lora_B.default.weight', 'model.

What is ESSA? (대為 at- Vikipedi- $PP-KommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilenKommentareTeilen


## Prior LoRA responses

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the fine-tuned model and tokenizer
model_path = "/content/drive/MyDrive/Kaggle_X/Mary_ESSA/output/gemma2_essa_ft1"
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Set the model to evaluation mode
model.eval()

# Define the initial context/preamble
pre_context = "You are an AI assistant that can answer questions about ESSA. ESSA stands for the Every Student Succeeds Act."

# Function to generate a response for a given prompt
def generate_response(prompt):
    # Tokenize the input prompt
    inputs = tokenizer(prompt, return_tensors="pt")

    # Generate a response
    with torch.no_grad():
        outputs = model.generate(inputs["input_ids"], max_length=150, num_return_sequences=1)

    # Decode the generated response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Ask the first question
first_question = f"{pre_context}\n\nWhat is ESSA?"
response_1 = generate_response(first_question)
print("Question 1:", first_question)
print("Response 1:", response_1)

# Scond question
second_question = "How does ESSA impact student achievement?"
response_2 = generate_response(second_question)
print("\nQuestion 2:", second_question)
print("Response 2:", response_2)

# Third question
third_question = "What is Title I?"
response_3 = generate_response(third_question)
print("\nQuestion 3:", third_question)
print("Response 3:", response_3)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Question 1: You are an AI assistant that can answer questions about ESSA. ESSA stands for the Every Student Succeeds Act.

What is ESSA?
Response 1: You are an AI assistant that can answer questions about ESSA. ESSA stands for the Every Student Succeeds Act.

What is ESSA?

ESSA is a federal law that was passed in 2015. It replaced the No Child Left Behind Act (NCLB).

What are the main goals of ESSA?

The main goals of ESSA are to:

* **Improve student achievement:** ESSA aims to ensure that all students, regardless of their background, have the opportunity to succeed in school.
* **Increase accountability:** ESSA requires states to set high standards for student achievement and to hold schools accountable for meeting those standards.
* **Promote flexibility:** ESSA gives states more flexibility in how

Question 2: How does ESSA impact student achievement?
Response 2: How does ESSA impact student achievement?

The Every Student Succeeds Act (ESSA) has had a significant impact on stude

Evaluation of LoRA results, looks good. Can even capture Title I which wasn't totally in my dataset.


In [None]:
#Did not save, was not best model
lora_save_path = "/content/drive/MyDrive/Kaggle_X/Mary_ESSA/output/gemma2_LoRA/fine_tuned_model"
os.makedirs(os.path.dirname(lora_save_path), exist_ok=True)
model.save_pretrained(lora_save_path)

In [None]:
# With Gemma2 base 2b-it, the function loads and merges the LORA adapter weights.
model, tokenizer = load_lora_model("google/gemma-2-2b-it", lora_save_path)
model = model.to('cpu')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#RAG implementation


# Install additional RAG libraries and tools

In [35]:
!pip install python-docx langchain



In [36]:
import langchain
import pandas as pd


In [37]:
from langchain.schema import Document as LangchainDocument
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from transformers import pipeline


In [38]:
pip install pandas openpyxl



# Import and manage RAG file

In [39]:
uploaded = files.upload()

Saving ESSA RAG file_10.31.xlsx to ESSA RAG file_10.31.xlsx


In [46]:
# Load the Excel file
xls_file = "ESSA RAG file_10.31.xlsx"

# Read the entire Excel file
df = pd.read_excel(xls_file)

for idx, row in df.iterrows():
  txt = DocumentWithText(row['Question'] + ' ' + row['Answer'])
  texts.append(txt)

print("Number of items=", len(texts))

# Debugging - print all the items
#for txt in texts:
#  print(txt.page_content)

# Create embeddings and vector store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create a Chroma vector database from the documents
# Important: Make sure to delete previous db (if any) or else retrieval returns lots of duplicates :)
try:
  db.delete_collection()
except:
  pass
db = Chroma.from_documents(texts, embeddings)

# Create a retriever from the vector database
# NOTE: You need to experiment with retrieval parameters
retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwargs={"k": 2, "score_threshold": 0.5})

Number of items= 204


In [47]:
#Test the RAG retriever on a prompt

docs = retriever.invoke("What is ESSA?")
print("retreived", len(docs), "documents")
print(docs)

retreived 2 documents
[Document(metadata={}, page_content='What is ESSA? ESSA is the\xa0federal law\xa0that allows the U.S. Government to support both national and local education goals with grants and other resources. Passed in 2015, it replaced\xa0No Child Left Behind (NCLB).\xa0and became\xa0the latest\xa0iteration, or extension, of the 1965\xa0Elementary and Secondary Education Act.'), Document(metadata={}, page_content='What is an ESSA State Plan? An ESSA State Plan is a comprehensive document developed by each U.S. state to outline how it will implement the Every Student Succeeds Act (ESSA). ESSA, enacted in December 2015, is a federal law that governs the United States K-12 public education policy.')]


# Prompt responses after RAG

In [48]:
# Load a separate QA pipeline
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# Function to get answer from the model using relevant context
def get_answer(query):
    # Use the retriever to find relevant documents
    relevant_docs = retriever.get_relevant_documents(query)  # Changed invoke to retrieve

    if not relevant_docs:
        return "No relevant context found for answering the question."

    # Combine the text from relevant documents to create a coherent context
    context_chunk = "\n".join([doc.page_content for doc in relevant_docs])

    # Use the QA pipeline to get an answer
    response = qa_pipeline(question=query, context=context_chunk)

    # Return the answer found by the QA model
    return response['answer']

    # Example usage of the function
query = "What is ESSA?"
result = get_answer(query)
print("Answer:", result)


Answer: federal law


In [49]:
# Ssecond question
query2 = "What is Title I?"
result2 = get_answer(query2)

# Result for the second question
print("Second Question:", query2)
print("Answer:", result2)



Second Question: What is Title I?
Answer: No relevant context found for answering the question.


In [50]:
# Third question
query3 = "What does ESSA say about state assessments?"
result3 = get_answer(query3)

# Result for the second question
print("Third Question:", query3)
print("Answer:", result3)

Third Question: What does ESSA say about state assessments?
Answer: Provide for participation in the assessments of all students in the grades assessed


RAG results are short and not very elaborate or clear.

#Gradio Interface

In [53]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.5.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.7.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<1.0,>=0.1.1 (from gradio)
  Downloading safehttpx-0.1.1-py3-none-any.whl.metad

In [80]:
import os
import pandas as pd
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
model_name2 = 'google/gemma-2-2b-it'
model2, tokenizer2 = load_model_and_tokenizer(model_name2)
model2 = model2.to('cpu')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [81]:
def chatbot(input_text):
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=150)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [None]:
from google.colab import files

uploaded = files.upload()

Saving ESSA image.png to ESSA image.png


In [83]:
# Set up the Gradio interface
iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(lines=2, label="Enter your ESSA question here..."),
    outputs=gr.Textbox(lines=7, label="ESSA answer"),
    title="ESSA Answers",
    description="An ESSA chatbot powered by a fine-tuned model.",
    examples=[["What is ESSA?"], ["What are the key provisions of ESSA?"]],
    allow_flagging="never",  # Disable the flagging feature
)

# Launch the interface with the image
iface.launch(share=True)




Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d371d1ce7cc4077045.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


