In [None]:
%pip install nltk pandas
%pip install textstat
%pip install transformers torch scikit-learn
import nltk
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
from nltk import pos_tag
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

In [9]:
%pip install language-tool-python
import language_tool_python

Note: you may need to restart the kernel to use updated packages.


In [61]:
import pandas as pd
df = pd.read_csv('finetune_res_asap.csv')

# Structure

## Basic Lengths

In [62]:
# feedback word count
df['feedback'] = df['feedback'].astype(str)
df['wc'] = df['feedback'].apply(lambda x: len(x.split()))

In [63]:
# feedback sentence count
def count_sentences(feedback):
    sentences = sent_tokenize(feedback)
    return len(sentences)

df['sc'] = df['feedback'].apply(count_sentences)

## Complexity

In [64]:
# fleisch kincaid
import pandas as pd
import textstat
df['fk'] = df['feedback'].apply(textstat.flesch_reading_ease)

In [65]:
# TTR
import nltk
def calculate_ttr(feedback):
    tokens = word_tokenize(feedback)
    unique_tokens = set(tokens)
    ttr = len(unique_tokens) / len(tokens) if tokens else 0
    return ttr

df['ttr'] = df['feedback'].apply(calculate_ttr)

## Fluency

In [66]:
# inverse perplexity
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Function to calculate inverse perplexity
def calculate_inverse_perplexity(feedback):
    inputs = tokenizer(feedback, return_tensors='pt')
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs['input_ids'])
    loss = outputs.loss.item()
    perplexity = torch.exp(torch.tensor(loss)).item()
    inverse_perplexity = 1 / perplexity if perplexity != 0 else 0
    return inverse_perplexity

df['inverse_perplexity'] = df['feedback'].apply(calculate_inverse_perplexity)



In [67]:
# grammatical errors

# Initialize the language tool
tool = language_tool_python.LanguageTool('en-US')

# Function to count grammatical errors
def count_grammatical_errors(text):
    matches = tool.check(text)
    return len(matches)

df['gr_count'] = df['feedback'].apply(count_grammatical_errors)
df['gr_rate'] = df['gr_count'] / df['wc']

In [68]:
df.head()

Unnamed: 0,essay_id,prompt,essay,grade_level,domain1_score,type,input,predictions,extracted_content,excerpt,feedback,wc,sc,fk,ttr,inverse_perplexity,gr_count,gr_rate
0,1206,"More and more people use computers, but not ev...","Dear editor @ORGANIZATION2 the source, @CAPS1,...",8,11,persuasive,### INSTRUCTIONS: You are my English teacher. ...,### INSTRUCTIONS: You are my English teacher. ...,"***""safety is on bigger issue""---I agree, but ...",safety is on bigger issue,"I agree, but what can we do to address the saf...",12,1,84.68,1.0,0.04449,0,0.0
1,1206,"More and more people use computers, but not ev...","Dear editor @ORGANIZATION2 the source, @CAPS1,...",8,11,persuasive,### INSTRUCTIONS: You are my English teacher. ...,### INSTRUCTIONS: You are my English teacher. ...,"***""safety is on bigger issue""---I agree, but ...",The massive amounts @ORGANIZATION2 typing on t...,Good job providing evidence!,4,1,33.58,1.0,0.000393,0,0.0
2,1206,"More and more people use computers, but not ev...","Dear editor @ORGANIZATION2 the source, @CAPS1,...",8,11,persuasive,### INSTRUCTIONS: You are my English teacher. ...,### INSTRUCTIONS: You are my English teacher. ...,"***""safety is on bigger issue""---I agree, but ...","My bro, @PERSON2, had on assignment to do @DAT...","Brother, how could we strengthen our argument?...",19,2,44.91,0.875,0.01681,0,0.0
3,1206,"More and more people use computers, but not ev...","Dear editor @ORGANIZATION2 the source, @CAPS1,...",8,11,persuasive,### INSTRUCTIONS: You are my English teacher. ...,### INSTRUCTIONS: You are my English teacher. ...,"***""safety is on bigger issue""---I agree, but ...","In closing, overusing your computer can cause ...","Excellent summary! Again, I would make sure to...",20,2,59.64,0.913043,0.035278,0,0.0
4,12191,"Read ""Narciso Rodriguez"" by Narciso Rodriguez....",The mood of the memoir is appreciation. I thin...,8,3,litanalysis,### INSTRUCTIONS: You are my English teacher. ...,### INSTRUCTIONS: You are my English teacher. ...,"***"" The mood of the memoir is appreciation. I...",The mood of the memoir is appreciation. I thin...,Good job identifying the mood of the memoir. R...,21,2,69.28,0.875,0.012918,0,0.0


# Specificity

## Content Word Density

In [69]:
import nltk
def calculate_content_word_density(feedback):
    tokens = word_tokenize(feedback)
    pos_tags = pos_tag(tokens)
    # Content words are nouns, verbs, adjectives, and adverbs
    content_words = [word for word, pos in pos_tags if pos.startswith('NN') or pos.startswith('VB') or pos.startswith('JJ') or pos.startswith('RB')]
    content_word_density = len(content_words) / len(tokens) if tokens else 0
    
    return content_word_density

df['content_word_density'] = df['feedback'].apply(calculate_content_word_density)

## Uptake

In [70]:
# def. uptake as overlap
def calculate_uptake(excerpt, feedback):
    # Tokenize both excerpt and feedback comment
    excerpt_tokens = set(word_tokenize(excerpt.lower()))
    feedback_tokens = set(word_tokenize(feedback.lower()))
    
    # Find common words
    common_words = excerpt_tokens.intersection(feedback_tokens)
    
    # Calculate uptake as the ratio of common words to total words in the excerpt
    uptake = len(common_words) / len(excerpt_tokens) if excerpt_tokens else 0
    
    return uptake

# Apply the function to calculate uptake for each row
df['uptake'] = df.apply(lambda row: calculate_uptake(row['excerpt'], row['feedback']), axis=1)

## Context Relevance

In [71]:
# cosine similarity of embeddings
import torch
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings

# Function to calculate cosine similarity between embeddings
def calculate_cosine_similarity(excerpt, feedback):
    excerpt_embedding = get_embeddings(excerpt)
    feedback_embedding = get_embeddings(feedback)
    similarity = cosine_similarity([excerpt_embedding], [feedback_embedding])[0][0]
    return similarity

# Apply the function to calculate cosine similarity for each row
df['cosine_similarity'] = df.apply(lambda row: calculate_cosine_similarity(row['excerpt'], row['feedback']), axis=1)



# Semantic Focus

## Pronouns

In [72]:
first_person_singular = {'i', 'me', 'my', 'mine'}
second_person = {'you', 'your', 'yours'}
first_person_plural = {'we', 'us', 'our', 'ours'}

def count_pronouns(feedback, pronoun_set):
    tokens = word_tokenize(feedback.lower())
    pronoun_count = sum(1 for token in tokens if token in pronoun_set)
    return pronoun_count

df['pronoun_fps'] = df['feedback'].apply(lambda x: count_pronouns(x, first_person_singular))
df['pronoun_sp'] = df['feedback'].apply(lambda x: count_pronouns(x, second_person))
df['pronoun_fpp'] = df['feedback'].apply(lambda x: count_pronouns(x, first_person_plural))


## Questions

In [73]:
wh_words = {'what', 'where', 'when', 'which', 'who', 'whom', 'whose', 'why', 'how'}
auxiliary_verbs = {'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'shall', 'should', 'can', 'could', 'may', 'might', 'must'}
helping_verbs = {"is", "am", "can", "are", "do", "does"}

def is_question(sentence):
    tokens = word_tokenize(sentence)
    return sentence.strip().endswith('?') or (tokens and (tokens[0] in wh_words or tokens[0] in helping_verbs))

def count_wh_questions(feedback):
    sentences = sent_tokenize(feedback.lower())
    wh_question_count = sum(1 for sentence in sentences if is_question(sentence) and word_tokenize(sentence)[0] in wh_words)
    return wh_question_count

def count_yes_no_questions(feedback):
    sentences = sent_tokenize(feedback.lower())
    yes_no_question_count = sum(1 for sentence in sentences if is_question(sentence) and word_tokenize(sentence)[0] in auxiliary_verbs)
    return yes_no_question_count

df['wh_question_count'] = df['feedback'].apply(count_wh_questions)
df['yes_no_question_count'] = df['feedback'].apply(count_yes_no_questions)


In [45]:
df.head()
df.to_csv('human_meta_pt1.csv', index=False)

In [44]:
df.head()

Unnamed: 0,tid,grade,prompt,essay,essayid,excerpt,feedback,startidx,endidx,commentid,...,ttr,inverse_perplexity,content_word_density,uptake,cosine_similarity,pronoun_fps,pronoun_sp,pronoun_fpp,wh_question_count,yes_no_question_count
0,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,i,Capital and introductory phrase,255,258,1718398386391,...,1.0,8.9e-05,0.75,0.0,0.507323,0,0,0,0,0
1,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,e a,Introductory phrase,548,551,1718398439880,...,1.0,5.3e-05,1.0,0.0,0.654442,0,0,0,0,0
2,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,dyer,wrong word,1137,1141,1718398530582,...,1.0,0.00017,1.0,0.0,0.683321,0,0,0,0,0
3,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,know it may sound funny but picking up trash ...,run-on sentence,1267,1487,1718398574934,...,1.0,0.00078,1.0,0.0,0.426005,0,0,0,0,0
4,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,or example if one student did community servic...,Good example,791,875,1718398626223,...,1.0,0.000219,1.0,0.0625,0.460753,0,0,0,0,0


In [16]:
import pandas as pd
from tqdm import tqdm
df = pd.read_csv('human_meta_pt1.csv')

In [74]:
df['feedback'] = df['feedback'].astype(str)
df['excerpt'] = df['excerpt'].astype(str)

## Non-Dialogic

In [75]:
from tqdm import tqdm
from transformers import pipeline
pipe = pipeline("text-classification", model="meiflwr/nondialogic_feedback", device = 0)
def get_dro(excerpt, feedback):
    out = []
    dro_out = pipe([{'text': feedback, 'text_pair': excerpt}])
    if dro_out[0]['label'] == 'LABEL_1':
        out.append(1)
    else:
        out.append(0)
    return out

tqdm.pandas()
df['ND'] = df.progress_apply(lambda row: get_dro(row['excerpt'], row['feedback']), axis=1)

100%|██████████| 200/200 [00:01<00:00, 134.35it/s]


## Non-Revision-Oriented

In [76]:
from transformers import pipeline
pipe = pipeline("text-classification", model="meiflwr/nonrevisionoriented_feedback")
def get_dro(excerpt, feedback):
    out = []
    dro_out = pipe([{'text': feedback, 'text_pair': excerpt}])
    if dro_out[0]['label'] == 'LABEL_1':
        out.append(1)
    else:
        out.append(0)
    return out

tqdm.pandas()
df['NRO'] = df.progress_apply(lambda row: get_dro(row['excerpt'], row['feedback']), axis=1)

100%|██████████| 200/200 [00:05<00:00, 33.37it/s]


## Pure Praise

In [77]:
from transformers import pipeline
pipe = pipeline("text-classification", model="meiflwr/praise_feedback")
def get_dro(excerpt, feedback):
    out = []
    dro_out = pipe([{'text': feedback, 'text_pair': excerpt}])
    if dro_out[0]['label'] == 'LABEL_1':
        out.append(1)
    else:
        out.append(0)
    return out

tqdm.pandas()
df['PP'] = df.progress_apply(lambda row: get_dro(row['excerpt'], row['feedback']), axis=1)

100%|██████████| 200/200 [00:06<00:00, 31.66it/s]


## Power-Affirming

In [78]:
from tqdm import tqdm
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("text-classification", model="meiflwr/power-affirming-feedback", device = 0)
def get_pa(excerpt, feedback):
  reward = pipe([{'text': feedback, 'text_pair': excerpt}])
  return reward[0]['score']

tqdm.pandas()
df['pa_mean'] = df.progress_apply(lambda row: get_pa(row['excerpt'], row['feedback']), axis=1)

100%|██████████| 200/200 [00:01<00:00, 134.35it/s]


In [79]:
df['pa_mean'].mean()

0.5849784334003926

In [80]:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe1 = pipeline("text-classification", model="meiflwr/poweraffirming_feedback_skew1")
pipe2 = pipeline("text-classification", model="meiflwr/poweraffirming_feedbak_skew2")
pipe3 = pipeline("text-classification", model="meiflwr/poweraffirming_feedback_skew3")

def get_pa(excerpt, feedback, pipeline):
  reward = pipeline([{'text': feedback, 'text_pair': excerpt}])
  return reward[0]['score']

tqdm.pandas()
df['pa1'] = df.progress_apply(lambda row: get_pa(row['excerpt'], row['feedback'], pipe1), axis=1)
df['pa2'] = df.progress_apply(lambda row: get_pa(row['excerpt'], row['feedback'], pipe2), axis=1)
df['pa3'] = df.progress_apply(lambda row: get_pa(row['excerpt'], row['feedback'], pipe3), axis=1)

# Calculate the variance of the pa-scores
df['pa_var'] = df[['pa1', 'pa2', 'pa3']].var(axis=1)
df['pa_mean2'] = df[['pa1', 'pa2', 'pa3']].mean(axis=1)

100%|██████████| 200/200 [00:06<00:00, 32.01it/s]
100%|██████████| 200/200 [00:06<00:00, 31.95it/s]
100%|██████████| 200/200 [00:05<00:00, 33.54it/s]


In [81]:
print(df['pa1'].mean())
print(df['pa2'].mean())
print(df['pa3'].mean())
print(df['pa_mean2'].mean())

0.5847966998815537
0.584222854077816
0.5793448173999787
0.5827881237864494


# Save

In [82]:
df.to_csv('finetune_res_asap_meta.csv', index = False)