In [2]:
%pip install nltk pandas
%pip install textstat
%pip install transformers torch scikit-learn
import nltk
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
from nltk import pos_tag
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt')

Collecting nltk
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 29.3 MB/s eta 0:00:01
Collecting joblib
  Downloading joblib-1.4.2-py3-none-any.whl (301 kB)
[K     |████████████████████████████████| 301 kB 86.3 MB/s eta 0:00:01
Installing collected packages: joblib, nltk
Successfully installed joblib-1.4.2 nltk-3.8.1
Note: you may need to restart the kernel to use updated packages.
Collecting textstat
  Downloading textstat-0.7.3-py3-none-any.whl (105 kB)
[K     |████████████████████████████████| 105 kB 35.1 MB/s eta 0:00:01
[?25hCollecting pyphen
  Downloading pyphen-0.15.0-py3-none-any.whl (2.1 MB)
[K     |████████████████████████████████| 2.1 MB 90.3 MB/s eta 0:00:01
[?25hInstalling collected packages: pyphen, textstat
Successfully installed pyphen-0.15.0 textstat-0.7.3
Note: you may need to restart the kernel to use updated packages.
Collecting scikit-learn
  Downloading scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_x86_64.man

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/mxtan/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package punkt to /home/mxtan/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [32]:
import pandas as pd
df = pd.read_csv('human_res.csv')

# Structure

## Basic Lengths

In [34]:
# feedback word count
df['feedback'] = df['feedback'].astype(str)
df['wc'] = df['feedback'].apply(lambda x: len(x.split()))

In [35]:
# feedback sentence count
def count_sentences(feedback):
    sentences = sent_tokenize(feedback)
    return len(sentences)

df['sc'] = df['feedback'].apply(count_sentences)

## Complexity

In [36]:
# fleisch kincaid
import pandas as pd
import textstat
df['fk'] = df['feedback'].apply(textstat.flesch_reading_ease)

In [37]:
# TTR
import nltk
def calculate_ttr(feedback):
    tokens = word_tokenize(feedback)
    unique_tokens = set(tokens)
    ttr = len(unique_tokens) / len(tokens) if tokens else 0
    return ttr

df['ttr'] = df['feedback'].apply(calculate_ttr)

## Fluency

In [38]:
# inverse perplexity
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Function to calculate inverse perplexity
def calculate_inverse_perplexity(feedback):
    inputs = tokenizer(feedback, return_tensors='pt')
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs['input_ids'])
    loss = outputs.loss.item()
    perplexity = torch.exp(torch.tensor(loss)).item()
    inverse_perplexity = 1 / perplexity if perplexity != 0 else 0
    return inverse_perplexity

df['inverse_perplexity'] = df['feedback'].apply(calculate_inverse_perplexity)

# Specificity

## Content Word Density

In [39]:
import nltk
def calculate_content_word_density(feedback):
    tokens = word_tokenize(feedback)
    pos_tags = pos_tag(tokens)
    # Content words are nouns, verbs, adjectives, and adverbs
    content_words = [word for word, pos in pos_tags if pos.startswith('NN') or pos.startswith('VB') or pos.startswith('JJ') or pos.startswith('RB')]
    content_word_density = len(content_words) / len(tokens) if tokens else 0
    
    return content_word_density

df['content_word_density'] = df['feedback'].apply(calculate_content_word_density)

## Uptake

In [40]:
# def. uptake as overlap
def calculate_uptake(excerpt, feedback):
    # Tokenize both excerpt and feedback comment
    excerpt_tokens = set(word_tokenize(excerpt.lower()))
    feedback_tokens = set(word_tokenize(feedback.lower()))
    
    # Find common words
    common_words = excerpt_tokens.intersection(feedback_tokens)
    
    # Calculate uptake as the ratio of common words to total words in the excerpt
    uptake = len(common_words) / len(excerpt_tokens) if excerpt_tokens else 0
    
    return uptake

# Apply the function to calculate uptake for each row
df['uptake'] = df.apply(lambda row: calculate_uptake(row['excerpt'], row['feedback']), axis=1)

## Context Relevance

In [41]:
# cosine similarity of embeddings
import torch
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings

# Function to calculate cosine similarity between embeddings
def calculate_cosine_similarity(excerpt, feedback):
    excerpt_embedding = get_embeddings(excerpt)
    feedback_embedding = get_embeddings(feedback)
    similarity = cosine_similarity([excerpt_embedding], [feedback_embedding])[0][0]
    return similarity

# Apply the function to calculate cosine similarity for each row
df['cosine_similarity'] = df.apply(lambda row: calculate_cosine_similarity(row['excerpt'], row['feedback']), axis=1)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# Semantic Focus

## Pronouns

In [42]:
first_person_singular = {'i', 'me', 'my', 'mine'}
second_person = {'you', 'your', 'yours'}
first_person_plural = {'we', 'us', 'our', 'ours'}

def count_pronouns(feedback, pronoun_set):
    tokens = word_tokenize(feedback.lower())
    pronoun_count = sum(1 for token in tokens if token in pronoun_set)
    return pronoun_count

df['pronoun_fps'] = df['feedback'].apply(lambda x: count_pronouns(x, first_person_singular))
df['pronoun_sp'] = df['feedback'].apply(lambda x: count_pronouns(x, second_person))
df['pronoun_fpp'] = df['feedback'].apply(lambda x: count_pronouns(x, first_person_plural))


## Questions

In [43]:
wh_words = {'what', 'where', 'when', 'which', 'who', 'whom', 'whose', 'why', 'how'}
auxiliary_verbs = {'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'shall', 'should', 'can', 'could', 'may', 'might', 'must'}
helping_verbs = {"is", "am", "can", "are", "do", "does"}

def is_question(sentence):
    tokens = word_tokenize(sentence)
    return sentence.strip().endswith('?') or (tokens and (tokens[0] in wh_words or tokens[0] in helping_verbs))

def count_wh_questions(feedback):
    sentences = sent_tokenize(feedback.lower())
    wh_question_count = sum(1 for sentence in sentences if is_question(sentence) and word_tokenize(sentence)[0] in wh_words)
    return wh_question_count

def count_yes_no_questions(feedback):
    sentences = sent_tokenize(feedback.lower())
    yes_no_question_count = sum(1 for sentence in sentences if is_question(sentence) and word_tokenize(sentence)[0] in auxiliary_verbs)
    return yes_no_question_count

df['wh_question_count'] = df['feedback'].apply(count_wh_questions)
df['yes_no_question_count'] = df['feedback'].apply(count_yes_no_questions)


In [45]:
df.head()
df.to_csv('human_meta_pt1.csv', index=False)

In [44]:
df.head()

Unnamed: 0,tid,grade,prompt,essay,essayid,excerpt,feedback,startidx,endidx,commentid,...,ttr,inverse_perplexity,content_word_density,uptake,cosine_similarity,pronoun_fps,pronoun_sp,pronoun_fpp,wh_question_count,yes_no_question_count
0,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,i,Capital and introductory phrase,255,258,1718398386391,...,1.0,8.9e-05,0.75,0.0,0.507323,0,0,0,0,0
1,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,e a,Introductory phrase,548,551,1718398439880,...,1.0,5.3e-05,1.0,0.0,0.654442,0,0,0,0,0
2,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,dyer,wrong word,1137,1141,1718398530582,...,1.0,0.00017,1.0,0.0,0.683321,0,0,0,0,0
3,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,know it may sound funny but picking up trash ...,run-on sentence,1267,1487,1718398574934,...,1.0,0.00078,1.0,0.0,0.426005,0,0,0,0,0
4,rfQv3LPBikeoFIE1NyA3,ms,Some of your friends perform community service...,"Dear Principal,\n\nI have heard you are having...",1CE6C10B9683,or example if one student did community servic...,Good example,791,875,1718398626223,...,1.0,0.000219,1.0,0.0625,0.460753,0,0,0,0,0


In [16]:
import pandas as pd
from tqdm import tqdm
df = pd.read_csv('human_meta_pt1.csv')

In [18]:
df['feedback'] = df['feedback'].astype(str)
df['excerpt'] = df['excerpt'].astype(str)

## Non-Dialogic

In [19]:
from transformers import pipeline
pipe = pipeline("text-classification", model="meiflwr/nondialogic_feedback", device = 0)
def get_dro(excerpt, feedback):
    out = []
    dro_out = pipe([{'text': feedback, 'text_pair': excerpt}])
    if dro_out[0]['label'] == 'LABEL_1':
        out.append(1)
    else:
        out.append(0)
    return out

tqdm.pandas()
df['ND'] = df.progress_apply(lambda row: get_dro(row['excerpt'], row['feedback']), axis=1)

100%|██████████| 1163/1163 [00:09<00:00, 129.08it/s]


## Non-Revision-Oriented

In [20]:
from transformers import pipeline
pipe = pipeline("text-classification", model="meiflwr/nonrevisionoriented_feedback")
def get_dro(excerpt, feedback):
    out = []
    dro_out = pipe([{'text': feedback, 'text_pair': excerpt}])
    if dro_out[0]['label'] == 'LABEL_1':
        out.append(1)
    else:
        out.append(0)
    return out

tqdm.pandas()
df['NRO'] = df.progress_apply(lambda row: get_dro(row['excerpt'], row['feedback']), axis=1)

100%|██████████| 1163/1163 [00:36<00:00, 31.61it/s]


## Pure Praise

In [21]:
from transformers import pipeline
pipe = pipeline("text-classification", model="meiflwr/praise_feedback")
def get_dro(excerpt, feedback):
    out = []
    dro_out = pipe([{'text': feedback, 'text_pair': excerpt}])
    if dro_out[0]['label'] == 'LABEL_1':
        out.append(1)
    else:
        out.append(0)
    return out

tqdm.pandas()
df['PP'] = df.progress_apply(lambda row: get_dro(row['excerpt'], row['feedback']), axis=1)

100%|██████████| 1163/1163 [00:38<00:00, 30.58it/s]


## Power-Affirming

In [22]:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe1 = pipeline("text-classification", model="meiflwr/poweraffirming_feedback_skew1")
pipe2 = pipeline("text-classification", model="meiflwr/poweraffirming_feedbak_skew2")
pipe3 = pipeline("text-classification", model="meiflwr/poweraffirming_feedback_skew3")

def get_pa(excerpt, feedback, pipeline):
  reward = pipeline([{'text': feedback, 'text_pair': excerpt}])
  return reward[0]['score']

tqdm.pandas()
df['pa1'] = df.progress_apply(lambda row: get_pa(row['excerpt'], row['feedback'], pipe1), axis=1)
df['pa2'] = df.progress_apply(lambda row: get_pa(row['excerpt'], row['feedback'], pipe2), axis=1)
df['pa3'] = df.progress_apply(lambda row: get_pa(row['excerpt'], row['feedback'], pipe3), axis=1)

# Calculate the variance of the pa-scores
df['pa_var'] = df[['pa1', 'pa2', 'pa3']].var(axis=1)
df['pa_mean'] = df[['pa1', 'pa2', 'pa3']].mean(axis=1)

100%|██████████| 1163/1163 [00:35<00:00, 32.97it/s]
100%|██████████| 1163/1163 [00:37<00:00, 30.66it/s]
100%|██████████| 1163/1163 [00:36<00:00, 31.86it/s]


# Save

In [23]:
df.to_csv('human_res_meta.csv', index = False)