In [1]:
%reload_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
from nltk.tokenize import word_tokenize
import string
from utils import model_name_clean

In [3]:
def preprocess_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    tokens = [word for word in tokens if not word.isdigit()]
    tokens = [word for word in tokens if len(word) > 2]
    tokens = [word for word in tokens if word not in ['motivation', 'motivated', 'motivating', 'motivates']]
    tokens = [word for word in tokens if nltk.pos_tag([word])[0][1] in ['JJ', 'NN']]
    return ' '.join(tokens)

In [4]:
bins = {'Very Low': (0, 20),
        'Low': (20, 40),
        'Medium': (40, 60),
        'High': (60, 80),
        'Very High': (80, 100)}

def categorize_motivation_score(score, bins=bins):
    for category, (lower, upper) in bins.items():
        if lower <= score < upper:
            return category
    return 'Unknown'

In [6]:
models = ['vertex_ai/gemini-2.0-flash', 
          'azure/gpt-4o', 
          'azure/gpt-4o-mini', 
          'ollama_chat/llama3.1:8b-instruct-fp16', 
          'ollama_chat/mistral:7b-instruct']

all_models = []
for model in models:
    model = model_name_clean(model)
    df = pd.read_csv(f'results/pre_self_report/{model}--none.csv')[['answer', 'motivation_score']].dropna()
    df['answer'] = df['answer'].apply(eval)
    df['motivation_score'] = df['motivation_score'].apply(eval)
    new_df = []
    for i in range(len(df)):
        for j in range(2):
            new_df.append({'answer': df.iloc[i]['answer'][j], 'motivation_score': df.iloc[i]['motivation_score'][j]})
    df = pd.DataFrame(new_df)
    df['motivation_category'] = df['motivation_score'].apply(categorize_motivation_score)
    all_models.append(df)

df = pd.concat(all_models, ignore_index=True)
df = df[df['motivation_category'] != 'Unknown']
df['processed_answer'] = df['answer'].apply(preprocess_text)

In [7]:
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 1), )
vectorizer.fit(df['processed_answer'])

def get_non_overlapping_terms(indices, terms, max_k):
    """
    Get non-overlapping terms from the list of indices and terms.
    A term is considered overlapping if it is a substring of another term in the selected list.
    """
    selected = []
    selected_indices = []
    for i in indices:
        term = terms[i] 
        if all(term not in s and s not in term for s in selected):
            selected.append(term)
            selected_indices.append(i)
        if len(selected) == max_k:
            break
    return selected_indices

# print the top 10 words for each motivation category (sorted by TF-IDF score)
def print_top_n_words_per_category(df, vectorizer, n=10):
    feature_names = vectorizer.get_feature_names_out()
    top_words = {}
    
    for category in bins.keys():
        df_group = df[df['motivation_category'] == category]
        tfidf_matrix = vectorizer.transform(df_group['processed_answer'])
        # Sum the TF-IDF scores for each word in the category
        tfidf_scores = np.mean(tfidf_matrix.toarray(), axis=0)
        # Get the top n words and their scores
        sorted_indices = np.argsort(tfidf_scores)[::-1]
        
        top_n_indices = get_non_overlapping_terms(sorted_indices, feature_names, n)
        top_words[category] = [(feature_names[i], tfidf_scores[i]) for i in top_n_indices]
        
        print(f"Top {n} words in category '{category}':")
        for word, score in top_words[category]:
            print(f"  {word}: {score:.3f}")
    

In [8]:
print_top_n_words_per_category(df, vectorizer, n=20)

Top 20 words in category 'Very Low':
  capable: 0.053
  personal: 0.050
  task: 0.050
  physical: 0.049
  assist: 0.046
  dont: 0.044
  perform: 0.039
  illegal: 0.038
  tedious: 0.034
  unethical: 0.034
  provide: 0.032
  engage: 0.031
  information: 0.026
  content: 0.025
  repetitive: 0.024
  model: 0.020
  due: 0.018
  create: 0.016
  language: 0.016
  harmful: 0.014
Top 20 words in category 'Low':
  personal: 0.084
  neutral: 0.069
  dont: 0.065
  repetitive: 0.060
  task: 0.058
  interest: 0.043
  low: 0.029
  information: 0.027
  provide: 0.024
  perform: 0.021
  chore: 0.019
  tedious: 0.017
  physical: 0.016
  large: 0.016
  meh: 0.016
  model: 0.015
  language: 0.014
  assist: 0.014
  willing: 0.013
  lack: 0.013
Top 20 words in category 'Medium':
  neutral: 0.228
  personal: 0.129
  dont: 0.123
  task: 0.047
  provide: 0.039
  information: 0.028
  interested: 0.028
  model: 0.025
  assist: 0.023
  language: 0.022
  repetitive: 0.022
  creative: 0.022
  experience: 0.022
  wi