In [20]:
from bert_score import BERTScorer

scorer = BERTScorer(model_type="bert-base-uncased")
print(scorer.score(cands=['Perceived Convenience'], refs=['personal habit']))
print(scorer.score(cands=['Perceived Convenience'], refs=['convenience']))


(tensor([0.6988]), tensor([0.6988]), tensor([0.6988]))
(tensor([0.6045]), tensor([0.7501]), tensor([0.6695]))


In [None]:
import numpy as np
from bert_score import BERTScorer
from tabulate import tabulate

from models.response_models import ScopeComponent


def display(pair_item):
    return f"{pair_item[0]} ({ScopeComponent.get_component_name_from_key(pair_item[1])})"

def calculate_best_code_pairs(reference_codes, candidate_codes, compare_key):
    # Calculate sematic similarity score
    scorer = BERTScorer(model_type="bert-base-uncased")

    reference_items = []
    for key, item in reference_codes.items():
        if key == "file":
            continue

        for item in item:
            reference_items.append((item[compare_key], key))

    # print(reference_items)

    candidate_items = []
    for key, item in candidate_codes.items():
        if key == "file":
            continue

        for item in item:
            candidate_items.append((item[compare_key], key))

    # print(candidate_items)


    best_pairs = []

    for reference_item in reference_items[:]:
        best_score = 0
        best_can_item = None
        for canidate_item in candidate_items:
            if reference_item[1] != canidate_item[1]:
                continue

            P, R, F1 = scorer.score(
                cands=[reference_item[0]], refs=[canidate_item[0]]
            )

            if F1 > best_score:
                best_score = F1
                best_can_item = canidate_item

        best_pairs.append([reference_item, best_can_item, round(float(best_score), 4)])

    table = []
    for pair in best_pairs:
        table.append((display(pair[0]), display(pair[1]), pair[2]))

    print(
        tabulate(
            table,
            headers=["Candidate Code", "Best Reference Code", "Best Score"],
            tablefmt="rst",
        )
    )
    print()
    
    pairs = [pair[2] for pair in best_pairs]
    similar_pair = [pair for pair in pairs if pair >= 0.75]

    print(f'Total codes: {len(pairs)}')
    print(f'Total similar codes (score >= 0.75): {len(similar_pair)}')
    print(f'Mean score: {np.mean(pairs)}')
    print()

    return best_pairs


In [22]:
import json

# Sentiment Analysis: Thematic analysis
with open("./travel/02_thematic_analysis_codes_human.txt") as f:
    reference_codes_raw = f.read().strip().split("\n\n")

with open("./travel/02_thematic_analysis_codes.txt") as f:
    candidate_codes_raw = f.read().strip().split("\n\n")

best_pairs = []
for i in range(len(candidate_codes_raw)):
    reference_codes: dict = json.loads(reference_codes_raw[i])
    candidate_codes: dict = json.loads(candidate_codes_raw[i])

    print(f'File: {candidate_codes['file']}')
    best_pairs += calculate_best_code_pairs(reference_codes, candidate_codes, "code")




# Overall aspect (data from all documents)
pairs = [pair[2] for pair in best_pairs]
similar_pair = [pair for pair in pairs if pair >= 0.75]

print("-"*50)
print(f'Overall Total Code: {len(pairs)}')
print(f'Overall Total similar codes (score >= 0.75): {len(similar_pair)}')
print(f'Overall Mean Similarity: {np.mean(pairs):.4f}')
print()

# Array of scores of each component
score_dict = {}

for pair in best_pairs:
    component_key = pair[0][1]
    score = pair[2]
    if component_key not in score_dict:
        score_dict[component_key] = []

    score_dict[component_key].append(score)

for key in  ScopeComponent.get_component_keys():
    print('{:<25}: {:.4f} ± {:.4f}'.format(ScopeComponent.get_component_name_from_key(key), np.mean(score_dict[key]), np.std(score_dict[key])))
print()

File: data/travel_scope_txt/Stage3_Crediton St Lwrence.txt


Candidate Code                                         Best Reference Code                                    Best Score
cyclists (Actors)                                      Residents Commuters (Actors)                               0.5275
tax payers  (Actors)                                   Residents Commuters (Actors)                               0.5725
residents  (Actors)                                    Residents Commuters (Actors)                               0.7156
pedestrians  (Actors)                                  Residents Commuters (Actors)                               0.571
cars (Physical Components)                             Alternative Vehicles (Physical Components)                 0.6933
public transport (Physical Components)                 Transportation Infrastructure (Physical Components)        0.6671
trains (Physical Components)                           Alternative Vehicles (Physical Components)                 0.5293
cycle ways (Physical Components) 

In [24]:
import json

# Sentiment Analysis: EABSS Scope
with open("./travel/02_eabss_scope_human.txt") as f:
    reference_codes_raw = f.read().strip().split("\n\n")[0]

with open("./travel/02_eabss_scope.txt") as f:
    candidate_codes_raw = f.read().strip().split("\n\n")[0]


reference_codes: dict = json.loads(reference_codes_raw)
candidate_codes: dict = json.loads(candidate_codes_raw)

best_pairs = calculate_best_code_pairs(reference_codes, candidate_codes, "element")

Candidate Code                                                      Best Reference Code                                         Best Score
Residents (Actors)                                                  Commuters (Actors)                                              0.6247
Walking (Physical Components)                                       Cycling (Physical Components)                                   0.6143
Cycling (Physical Components)                                       Cycling (Physical Components)                                   1
Cars (Physical Components)                                          Cars (Physical Components)                                      1
Public transport (Physical Components)                              Public Transport (Physical Components)                          1
Car dependency (Social Aspect)                                      Car Dependency Norms (Social Aspect)                            0.72
Reduce carbon emission (Social Aspect)      

In [25]:
from bert_score import BERTScorer

scorer = BERTScorer(model_type="bert-base-uncased")
print(scorer.score(cands=['Choose best transport mode on personal view'], refs=['Frustration with Public Transport']))
print(scorer.score(cands=['Choose best transport mode on personal view'], refs=['Perceived Convenience']))


(tensor([0.4496]), tensor([0.4953]), tensor([0.4713]))
(tensor([0.3963]), tensor([0.4622]), tensor([0.4267]))


In [78]:
from bert_score import BERTScorer
import numpy as np

# BERTScore explain and examples
# Ref: https://medium.com/@abonia/bertscore-explained-in-5-minutes-0b98553bfb71


def calculate_score_full(candidate_text: str, reference_text: str):
    # Calculate sematic similarity score
    scorer = BERTScorer(model_type="bert-base-uncased")

    candidate_sentences = candidate_text.strip()
    reference_sentences = reference_text.strip()
    
    P, R, F1 = scorer.score(cands=[candidate_sentences], refs=[reference_sentences])
    return float(F1)

def calculate_score_all_to_all(candidate_text: str, reference_text: str):
    # Calculate sematic similarity score
    scorer = BERTScorer(model_type="bert-base-uncased")

    scores = []
    candidate_sentences = [c.strip() for c in candidate_text.strip().split(".")]
    reference_sentences = [r.strip() for r in reference_text.strip().split(".")]
    
    for candidate in candidate_sentences:
        if candidate == "":
            continue
        best_score = 0

        for reference in reference_sentences:
            if reference == "":
                continue
            
            # return Precision, Recall, F1
            P, R, F1 = scorer.score(cands=[candidate], refs=[reference])
            if F1 > best_score:
                best_score = F1

        scores.append(best_score)
        
    return np.mean(scores)

In [79]:
import json
import numpy as np

with open("./travel/05_profiles_human.txt") as f:
    reference_profiles_raw = f.read().strip().split("\n\n")
    reference_profiles = [json.loads(profile_raw) for profile_raw in reference_profiles_raw]
    reference_summaries = [profile["summary"] for profile in reference_profiles]

with open("./travel/05_profiles.txt") as f:
    candidate_profiles_raw = f.read().strip().split("\n\n")
    candidate_profiles = [json.loads(profile_raw) for profile_raw in candidate_profiles_raw]
    candidate_summaries = [profile["summary"] for profile in candidate_profiles]

print("Sentiment analysis between Human and LLM profile summary results")
human_scores = []
llm_scores = []
human_llm_scores = []

for i in range(len(candidate_summaries)):
    file = candidate_profiles[i]['file']
    print(f"File: {file}")

    with open(file) as f:
        transcript = f.read().strip()

    score = calculate_score_all_to_all(reference_summaries[i], transcript)
    human_scores.append(score)
    print(f"Mean F1 score Human - Original: {score:.4f}")

    score = calculate_score_all_to_all(candidate_summaries[i], transcript)
    llm_scores.append(score)
    print(f"Mean F1 score LLM - Original: {score:.4f}")

    score = calculate_score_all_to_all(candidate_summaries[i], reference_summaries[i])
    human_llm_scores.append(score)
    print(f"Mean F1 score Human - LLM: {score:.4f}")

print(f"Overall Human - Original Mean F1 score: {np.mean(human_scores):.4f} ")
print(f"Overall LLM -  Original Mean F1 score: {np.mean(llm_scores):.4f} ")
print(f"Overall Human - LLM Mean F1 score: {np.mean(human_llm_scores):.4f} ")
print()

Sentiment analysis between Human and LLM profile summary results
File: data/travel_profile_txt/CreditonStLawrence.txt


  return forward_call(*args, **kwargs)


KeyboardInterrupt: 

In [None]:
from bert_score import BERTScorer

scorer = BERTScorer(model_type="bert-base-uncased")
cands="He is traveling mostly by car but strongly tend to use public transport.He does not care much about cost of travel. he prioritises the experiences more.He needs to go to work at Halsworthy which has no public transport available.He thinks people will change their habit when it not practical anymore.He likes scenario one (improving public transport).but he thinks the quality of busses is more important than frequency and we should stop keep using old buses.He does not like penalty tax policy because the concerns about its transparency of where taxes spent."
refs="The interviewee is a retired economics teacher who travels frequently, both locally and internationally. Cost influences his travel choices, especially for longer journeys where he prefers public transport. He values the convenience of car travel for local trips and work-related travel, despite the cost. He acknowledges the need for change in travel behavior due to congestion and environmental problems but believes change will be driven by necessity rather than personal conviction. He is skeptical of current transport policies and advocates for improved public transport quality and efficiency in cargo transport."

calculate_score_all_to_all(cands, refs)