This notebook allows you to run a word alignment assessment and view all the results in their entirety, to help with debugging.

In [1]:
from pathlib import Path
from typing import Optional, Literal

import modal
from pydantic import BaseModel
import pandas as pd

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
pd.set_option('display.max_rows', 500)
# from app import Assessment

In [2]:
class Assessment(BaseModel):
    assessment: Optional[int] = None
    revision: int
    reference: int
    type: Literal["word-alignment"]

In [9]:
! pip install ipywidgets

Collecting ipywidgets
  Downloading ipywidgets-8.0.4-py3-none-any.whl (137 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.8/137.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m[31m3.4 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting widgetsnbextension~=4.0
  Downloading widgetsnbextension-4.0.5-py3-none-any.whl (2.0 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
[?25hCollecting ipykernel>=4.5.1
  Downloading ipykernel-6.21.2-py3-none-any.whl (149 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.7/149.7 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
[?25hCollecting traitlets>=4.3.1
  Downloading traitlets-5.9.0-py3-none-any.whl (117 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.4/117.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[

In [29]:
stub = modal.Stub(
    name="run-word-alignment-debug",
    image=modal.Image.debian_slim().pip_install(
        "pandas==1.4.3",
        "machine==0.0.1",
        "sil-machine[thot]>=0.8.3",
        "asyncio"
    ).copy(
        mount=modal.Mount.from_local_dir(
            local_path=Path("../../fixtures/"), remote_path=Path("/root/fixtures")
        ),
    ),
)

In [30]:
stub.run_word_alignment = modal.Function.from_name("word-alignment-test", "assess")

In [31]:
@stub.function(
            timeout=3600, 
            mounts=[
    *modal.create_package_mounts(["app", "word_alignment_steps"]),
    modal.Mount.from_local_dir(local_path="./", remote_path="/"),
])
def get_results(config):
    from app import Assessment
    assessment_config = Assessment(
            revision=config['revision'], 
            reference=config['reference'], 
            type='word-alignment'
            )
    results = modal.container_app.run_word_alignment.call(assessment_config, return_all_results = True)
    return results

In [32]:
def assess_draft(revision, reference):
    with stub.run():

        config = {
                'revision':revision, 
                'reference':reference, 
        }
        
        results = get_results.call(config=config)
    return results

In [62]:
revision = 138 # Ngoreme
reference = 10 # Greek Lemma

In [63]:
results = assess_draft(revision, reference)

In [64]:
results.shape

(743807, 12)

In [75]:
results[results['vref'] == 'LUK 1:3']

Unnamed: 0,vref,source,target,alignment_score,alignment_count,co-occurrence_count,translation_score,avg_aligned,match_score,count,embedding_score,total_score
385,LUK 1:3,δοκέω,na,0.0,0.0,15.0,0.0,0.0,0.009185,8.0,0.124695,0.026776
386,LUK 1:3,δοκέω,eni,0.313232,1.0,4.0,0.012306,0.25,0.027397,4.0,0.148926,0.150372
387,LUK 1:3,δοκέω,ghwiki,0.0,0.0,2.0,0.0,0.0,0.0125,2.0,0.127319,0.027964
388,LUK 1:3,δοκέω,",",0.0,0.0,59.0,0.149902,0.0,0.008687,18.0,0.190186,0.069755
389,LUK 1:3,δοκέω,꞉,0.0,0.0,2.0,0.0,0.0,0.013889,1.0,-0.027725,-0.002767
390,LUK 1:3,δοκέω,nintamboneriire,0.0,0.0,1.0,0.0,0.0,0.055556,1.0,-0.09198,-0.007285
391,LUK 1:3,δοκέω,amangʼana,0.0,0.0,5.0,0.0,0.0,0.012618,4.0,0.174683,0.03746
392,LUK 1:3,δοκέω,ghayo,0.0,0.0,4.0,0.0,0.0,0.009615,2.0,0.171875,0.036298
393,LUK 1:3,δοκέω,ghansi,0.0,0.0,2.0,0.0,0.0,0.018868,2.0,0.203369,0.044447
394,LUK 1:3,δοκέω,.,0.0,0.0,28.0,0.01944,0.0,0.009081,17.0,0.168945,0.039493


In [66]:
total_scores_df = results[['vref', 'source', 'target', 'total_score']]
top_source_scores_df = total_scores_df.fillna(0)
top_source_scores_df = top_source_scores_df.loc[top_source_scores_df.groupby(['vref', 'source'], sort=False)['total_score'].idxmax(), :].reset_index(drop=True)

In [67]:
top_source_scores_df[:100]

Unnamed: 0,vref,source,target,total_score
0,LUK 1:1,ἐπειδήπερ,omutanke,0.802344
1,LUK 1:1,πολύς,abhaaru,0.226329
2,LUK 1:1,ἐπιχειρέω,teofiro,0.254667
3,LUK 1:1,ἀνατάσσομαι,mbaikongʼiri,0.625415
4,LUK 1:1,διήγησις,mbaikongʼiri,0.658838
5,LUK 1:1,περί,amangʼana,0.241853
6,LUK 1:1,ὁ,",",0.306412
7,LUK 1:1,πληροφορέω,ghaakooru,0.582715
8,LUK 1:1,ἐν,haare,0.139036
9,LUK 1:1,ἐγώ,itu,0.453019


In [68]:
verse_scores_df = top_source_scores_df.groupby('vref', as_index=False, sort=False).mean()
verse_scores_df = verse_scores_df.fillna(0)

In [72]:
verse_scores_df['total_score'].mean()
verse_scores_df.loc[:, 'score'] = verse_scores_df['total_score'].apply(lambda x: 16*x)

In [74]:
verse_scores_df[:200]

Unnamed: 0,vref,total_score,score
0,LUK 1:1,0.40815,6.530402
1,LUK 1:2,0.318111,5.089768
2,LUK 1:3,0.300181,4.802903
3,LUK 1:4,0.359249,5.747992
4,LUK 1:5,0.345238,5.523811
5,LUK 1:6,0.318009,5.088149
6,LUK 1:7,0.347334,5.557349
7,LUK 1:8,0.296049,4.736777
8,LUK 1:9,0.3151,5.041592
9,LUK 1:10,0.259552,4.152824
