<a href="https://colab.research.google.com/github/shantanu2106/appraisal-dimension-repository/blob/main/Toy_code_for_rating_appraisal_dimensions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing Packages

In [44]:
import re
import pandas as pd
import numpy as np
import torch
from sentence_transformers import SentenceTransformer
import random
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Setting Reproduceability seeds and stopwords

In [45]:
# Set seeds for reproducibility
import random
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
torch.use_deterministic_algorithms(True)

# Define stopwords set
stop_words = set(stopwords.words('english'))

Toy appraisal anchors for mapping phrases with relavent dimensions

In [46]:
# Example dimension anchor phrases (toy example):
# In practice, select these from established literature on appraisal theory.
appraisal_anchors = {
    'Certainty': ["it is certain", "without any doubt", "clearly defined goals"],
    'Pleasantness': ["a pleasant outlook", "enjoyable growth", "optimistic atmosphere"],
    'Goal Significance': ["crucial for our objectives", "important for our strategy", "key to future success"],
    'Control': ["we have control over", "manageable situation", "well within our influence"],
    'Responsibility': ["we take responsibility", "accountable for these outcomes", "we bear the burden"]
    # Add additional dimensions and phrases as needed
}

dimensions = list(appraisal_anchors.keys())


Initiation of Sentence Transformer model

In [47]:
# Load a sentence-level embedding model (Sentence-BERT)
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  # A compact, efficient model


Defining function of preprosessing using Regular Expressions

In [48]:
def load_document_sentences(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        text = f.read()
    # Basic sentence splitting
    sentences = re.split(r'[.?!]\s+', text.strip())
    sentences = [s.strip() for s in sentences if s.strip()]
    return sentences

Defining function for transforming similarities of anchor phrases with uploaded document

In [49]:
# ----------------------------------------------------
# Convert similarities to ratings
# Similarity is in [-1, 1]; we map this linearly to [1, 10].
def similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a)*np.linalg.norm(b))

Defining function from similarity to ratings

In [50]:
# Weighted aggregation of ratings:
# We'll weight each sentence's rating contribution by its similarity.
# If sim is high, that sentence has a bigger influence on the final rating.
def similarity_to_rating(sim):
    # sim=-1 => 1, sim=1 => 5
    rating = 1 + (sim + 1)*2  # linear mapping
    return round(rating, 2)

Defining function for finding the weighted average to aggregate final ratings for documents

In [51]:
def weighted_average(values, weights):
    if np.sum(weights) == 0:
        return np.round(np.mean(values), 2) if len(values) > 0 else 3.00
    return round(np.sum(values * weights) / np.sum(weights), 2)

Defining function for finding keywords in sentence that matches the anchor phrases set before

In [52]:
# Find the keyword in the sentence that best matches the chosen anchor phrase
def find_best_keyword_in_sentence(sentence, anchor_phrase_embed, model, stop_words):
    # Tokenize the sentence into words
    words = re.findall(r'\w+', sentence)
    filtered_words = [w for w in words if w not in stop_words and len(w) > 2]
    if not words:
        return None, None

  # Encode each filtered word
    word_embeddings = model.encode(filtered_words, convert_to_numpy=True)

    # Compute similarity of each word with the anchor phrase embedding
    best_sim = -1
    best_word = None
    for w, w_embed in zip(filtered_words, word_embeddings):
        sim = similarity(w_embed, anchor_phrase_embed)
        if sim > best_sim:
            best_sim = sim
            best_word = w

    return best_word, round(best_sim, 5)

Running the main script with all the defined functions

In [61]:
if __name__ == "__main__":
    # Replace 'input_document.txt' with your actual file
    input_file = 'input_document.txt'

    sentences = load_document_sentences(input_file)
    sentence_embeddings = model.encode(sentences, convert_to_numpy=True)

    # Precompute anchor embeddings for each phrase
    anchor_phrase_embeddings = {dim: model.encode(phrases, convert_to_numpy=True)
                                for dim, phrases in appraisal_anchors.items()}

    rows = []
    for i, sent in enumerate(sentences):
        sent_embed = sentence_embeddings[i]
        for dim in dimensions:
            # Find best anchor phrase for this sentence-dimension
            phrases = appraisal_anchors[dim]
            p_embeds = anchor_phrase_embeddings[dim]

            best_sim = -1
            best_phrase = None
            best_phrase_embed = None
            for p_idx, p_embed in enumerate(p_embeds):
                sim = similarity(sent_embed, p_embed)
                if sim > best_sim:
                    best_sim = sim
                    best_phrase = phrases[p_idx]
                    best_phrase_embed = p_embed

            rating = similarity_to_rating(best_sim)

            # Find best keyword in the sentence after stopword removal
            best_word, word_sim = find_best_keyword_in_sentence(sent, best_phrase_embed, model, stop_words)

            rows.append({
                'Sentence': sent,
                'Dimension': dim,
                'Best_Anchor_Phrase': best_phrase,
                'Best_Keyword_in_Sentence': best_word if best_word else "",
                'Keyword_Similarity_to_Anchor': word_sim if word_sim is not None else None,
                'Sentence_Similarity': round(best_sim, 5),
                'Sentence_Rating': rating
            })

    df = pd.DataFrame(rows)

    # Aggregate final ratings per dimension using weighted average by Sentence_Similarity
    final_results = []
    for dim in dimensions:
        dim_data = df[df['Dimension'] == dim]
        sims = dim_data['Sentence_Similarity'].values
        ratings = dim_data['Sentence_Rating'].values
        final_rating = weighted_average(ratings, sims)
        final_results.append({
            'Dimension': dim,
            'Final_Rating': final_rating
        })

    final_df = pd.DataFrame(final_results)



Visualising the output in Jupyter notebook environment

In [60]:

# Style the detailed sentence-level DataFrame
styled_df = (df.style
             .background_gradient(subset=["Sentence_Rating", "Keyword_Similarity_to_Anchor"], cmap='RdYlGn', low=0, high=0.7)
             .set_properties(**{'white-space': 'pre-wrap', 'width': '400px'}, subset=['Sentence'])
             .format(precision=2, subset=["Sentence_Rating", "Keyword_Similarity_to_Anchor", "Sentence_Similarity"]))

# Style the final aggregated ratings DataFrame
styled_final_df = (final_df.style
                   .background_gradient(subset=["Final_Rating"], cmap='RdYlGn', low=0, high=0.7)
                   .format(precision=2, subset=["Final_Rating"]))

# Display in Jupyter notebook
print("Detailed Sentence-Level Analysis:")
display(styled_df)

print("\nFinal Aggregated Ratings:")
display(styled_final_df)

Detailed Sentence-Level Analysis:


Unnamed: 0,Sentence,Dimension,Best_Anchor_Phrase,Best_Keyword_in_Sentence,Keyword_Similarity_to_Anchor,Sentence_Similarity,Sentence_Rating
0,"I will next provide a market, technology and manufacturing outlook",Certainty,clearly defined goals,technology,0.2,0.1,3.2
1,"I will next provide a market, technology and manufacturing outlook",Pleasantness,a pleasant outlook,outlook,0.35,0.3,3.6
2,"I will next provide a market, technology and manufacturing outlook",Goal Significance,key to future success,technology,0.38,0.45,3.9
3,"I will next provide a market, technology and manufacturing outlook",Control,well within our influence,provide,0.29,0.2,3.4
4,"I will next provide a market, technology and manufacturing outlook",Responsibility,accountable for these outcomes,provide,0.29,0.2,3.39
5,"While we are monitoring the near-term impacts of solar procurement, the catalysts for driving increased utility-scale solar penetration continue to grow",Certainty,clearly defined goals,utility,0.27,-0.0,3.0
6,"While we are monitoring the near-term impacts of solar procurement, the catalysts for driving increased utility-scale solar penetration continue to grow",Pleasantness,enjoyable growth,grow,0.64,0.12,3.25
7,"While we are monitoring the near-term impacts of solar procurement, the catalysts for driving increased utility-scale solar penetration continue to grow",Goal Significance,crucial for our objectives,utility,0.39,0.17,3.35
8,"While we are monitoring the near-term impacts of solar procurement, the catalysts for driving increased utility-scale solar penetration continue to grow",Control,we have control over,continue,0.32,0.19,3.39
9,"While we are monitoring the near-term impacts of solar procurement, the catalysts for driving increased utility-scale solar penetration continue to grow",Responsibility,accountable for these outcomes,impacts,0.32,0.08,3.16



Final Aggregated Ratings:


Unnamed: 0,Dimension,Final_Rating
0,Certainty,3.29
1,Pleasantness,3.37
2,Goal Significance,3.53
3,Control,3.37
4,Responsibility,3.38
