In [1]:
!pip install accelerate peft bitsandbytes transformers trl tqdm pandas torch transformers



In [None]:
import pandas as pd
import torch
import pandas as pd
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
from tqdm import tqdm

In [15]:
from transformers import (
    pipeline,
)
def format_prompt_baseline(essay, assignment):
    text = (
        "You are my English teacher. Read my essay and assignment:\n\n"
        f"Essay: '''{essay}'''\n\n"
        f"Assignment: '''{assignment}'''\n\n"
        "Give me feedback to help me revise. Extract three to five very short excerpts from my essay and give me feedback on those. Keep your feedback very short. List the excerpts and feedback like this:\n"
        "*[excerpt]---[feedback]\n"
        "*[excerpt]---[feedback]\n"
        "*[excerpt]---[feedback]\n"
    )
    return text

def generate(input, model, tokenizer):
    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=2048)
    result = pipe(f"<s>[INST] {input} [/INST]")
    return result[0]['generated_text']

In [16]:
# Load dataset
df = pd.read_csv('eval_data_6_30.csv')
df['input'] = df.apply(lambda row: format_prompt_baseline(row['essay'], row['prompt']), axis=1)

In [13]:
df.head()

Unnamed: 0,essay,prompt,input
0,"Dear Principal,\n\nI have heard you are having...",Some of your friends perform community service...,You are my English teacher. Read my essay and ...
1,Asking for more than one person for advice can...,"When people ask for advice, they sometimes tal...",You are my English teacher. Read my essay and ...
2,"Dear Principal,\n\nConsidering a policy change...",Your principal is considering changing school ...,You are my English teacher. Read my essay and ...
3,"2/1/11\n\nDear Mr. Principal,\n\nI believe tha...",Some of your friends perform community service...,You are my English teacher. Read my essay and ...
4,The New school policy\n\nI do not like the new...,Your principal is considering changing school ...,You are my English teacher. Read my essay and ...


In [3]:
df.shape

(174, 3)

In [4]:
# Set up model config
base_model = "NousResearch/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
compute_dtype = getattr(torch, "float16")
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards: 100%|██████████| 2/2 [01:21<00:00, 40.87s/it]


In [17]:
#try just 1
res = generate(df['input'][0], model, tokenizer)
print(res)

<s>[INST] You are my English teacher. Read my essay and assignment:

Essay: '''Dear Principal,

I have heard you are having trouble making a decision on whether students should or shouldn't be required to do community service. I know many people who do community service including myself. From personal experience of community service i can tell you that it can help the way you think and act around certain types of people and ethnic groups. I am not say there is a different way to act around others, although it would be unprofessional to act like you do with your friends around the president of the United States. Therefore all students should be required to perform community service acts.

One of the main reasons community service should be essential is because students now a days have more time to do these things and it would greatly impact our communities. For example if one student did community service by picking up trash in a near by park we could prevent people from overseeing thes

In [18]:
tqdm.pandas()
df['predictions'] = df.progress_apply(lambda row: generate(row['input'], model, tokenizer), axis=1)
df['predictions'][0]

100%|██████████| 174/174 [32:55<00:00, 11.36s/it]


'<s>[INST] You are my English teacher. Read my essay and assignment:\n\nEssay: \'\'\'Dear Principal,\n\nI have heard you are having trouble making a decision on whether students should or shouldn\'t be required to do community service. I know many people who do community service including myself. From personal experience of community service i can tell you that it can help the way you think and act around certain types of people and ethnic groups. I am not say there is a different way to act around others, although it would be unprofessional to act like you do with your friends around the president of the United States. Therefore all students should be required to perform community service acts.\n\nOne of the main reasons community service should be essential is because students now a days have more time to do these things and it would greatly impact our communities. For example if one student did community service by picking up trash in a near by park we could prevent people from over

In [19]:
import re
def extract_text_after_tag(text, tag='[/INST]'):
    pattern = re.compile(f"{re.escape(tag)}(.*)", re.DOTALL)
    match = pattern.search(text)
    if match:
        return match.group(1).strip()
    return ""

def extract_feedback_sections(text):
    # Split the text by asterisks to separate sections
    sections = text.split('*')[1:]  # Ignore the part before the first asterisk
    
    result = []
    for section in sections:
        # Use regex to extract the text between * and ---
        excerpt_match = re.search(r'"(.*?)"---', section, re.DOTALL)
        excerpt = excerpt_match.group(1).strip() if excerpt_match else ''

        # Extract the feedback text after --- and before \n
        feedback_match = re.search(r'---(.*?)(\n|$)', section, re.DOTALL)
        #feedback_match = re.search(r'---(.*)', section, re.DOTALL)
        feedback = feedback_match.group(1).strip() if feedback_match else ''

        if len(excerpt) > 0 and len(feedback) > 0:
            result.append({'excerpt': excerpt, 'feedback': feedback})
    
    return result

def expand_pred_df(df, input_column):
    new_rows = []
    for idx, row in df.iterrows():
        input_text = row[input_column]
        sections = extract_feedback_sections(input_text)
        for section in sections:
            new_row = row.to_dict()
            new_row['excerpt'] = section['excerpt']
            new_row['feedback'] = section['feedback']
            new_rows.append(new_row)
    
    expanded_df = pd.DataFrame(new_rows)
    return expanded_df

In [20]:
df['extracted_content'] = df['predictions'].apply(extract_text_after_tag)

In [21]:
df.head()

Unnamed: 0,essay,prompt,input,predictions,extracted_content
0,"Dear Principal,\n\nI have heard you are having...",Some of your friends perform community service...,You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,"Sure, I'd be happy to help you revise your ess..."
1,Asking for more than one person for advice can...,"When people ask for advice, they sometimes tal...",You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,"Sure, I'd be happy to help you revise your ess..."
2,"Dear Principal,\n\nConsidering a policy change...",Your principal is considering changing school ...,You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,Of course! Here are some excerpts from your es...
3,"2/1/11\n\nDear Mr. Principal,\n\nI believe tha...",Some of your friends perform community service...,You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,Of course! Here are some excerpts from your es...
4,The New school policy\n\nI do not like the new...,Your principal is considering changing school ...,You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,"Sure, I'd be happy to help you revise your ess..."


In [22]:
res_df = expand_pred_df(df, 'extracted_content')

In [23]:
res_df.head()

Unnamed: 0,essay,prompt,input,predictions,extracted_content,excerpt,feedback
0,"Dear Principal,\n\nI have heard you are having...",Some of your friends perform community service...,You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,"Sure, I'd be happy to help you revise your ess...",I know many people who do community service in...,Feedback: Consider providing specific examples...
1,"Dear Principal,\n\nI have heard you are having...",Some of your friends perform community service...,You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,"Sure, I'd be happy to help you revise your ess...",Doing such a little thing to a community could...,Feedback: Emphasize the long-term benefits of ...
2,"Dear Principal,\n\nI have heard you are having...",Some of your friends perform community service...,You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,"Sure, I'd be happy to help you revise your ess...",I know it may sound funny but picking up trash...,Feedback: Highlight the various ways in which ...
3,Asking for more than one person for advice can...,"When people ask for advice, they sometimes tal...",You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,"Sure, I'd be happy to help you revise your ess...","Asking advice from others, because it could gi...",This sentence could be rephrased for clarity. ...
4,Asking for more than one person for advice can...,"When people ask for advice, they sometimes tal...",You are my English teacher. Read my essay and ...,<s>[INST] You are my English teacher. Read my ...,"Sure, I'd be happy to help you revise your ess...",Their opinion could have been something you ha...,"This sentence is a good point, but it could be..."


In [24]:
res_df.shape

(667, 7)

In [25]:
res_df.to_csv('baseline_res.csv', index=False)