In [17]:
import numpy as np
import plotly.graph_objects as go

In [18]:
class edgecnt:
    PE, CE, OE, EE, ME = 0, 0, 0, [0, 0, 0, 0], [0, 0, 0, 0]
    
ppos_req = ["overall", "noun", "verb", "properNoun", "adjective", "adverb", "determiner", "numeral", "pronoun", "preposition"]

def getRaw(lang):
    metrics = {}
    print("Language: " + lang)
    path = "Results/RemLang/Analysis/" + lang + "/POS_RAW.txt"
    inp_file = open(path, "r")
    inp = inp_file.read().splitlines()
    it = 0
    
    while(it<len(inp)):
        temp = edgecnt()
        pos = inp[it]
        temp.OE = int(inp[it+1])
        temp.CE = int(inp[it+2])
        temp.EE = inp[it+3].split()
        temp.ME = inp[it+4].split()
        it+=5
        
        for i in range(4):
            temp.EE[i] = int(temp.EE[i])
            temp.ME[i] = int(temp.ME[i])
        temp.PE = sum(temp.EE)
        temp.PE += temp.CE
        metrics.update({pos : temp})
        
    return metrics
    
def compute(metrics):
    bvP, bvR, ipnoco, ionpci = [], [], [], []
    for i in range(5):
        ipnoco.append([])
        ionpci.append([])
        
    for pos in ppos_req:
        m = metrics[pos]
        bvp = m.CE * 100.00/ max(1, (m.CE + m.EE[3]))
        bvP.append(bvp)
        bvr = m.CE * 100.00 / max(1, (m.CE + m.ME[3]))
        bvR.append(bvr)
        
        sumipnoco, sumionpci = 0, 0
        for i in range(1, 5):
            sumipnoco += m.EE[i-1]*100.00/max(1, m.PE)
            sumionpci += m.ME[i-1]*100.00/max(1, m.OE)
            ipnoco[i].append(m.EE[i-1]*100.00/max(1, m.PE))
            ionpci[i].append(m.ME[i-1]*100.00/max(1, m.OE))
        ipnoco[0].append(100 - sumipnoco)
        ionpci[0].append(100 - sumionpci)
        
    return bvP, bvR, ipnoco, ionpci

def dataprinter(bvP, bvR, ipnoco, ionpci):
    i = 0
    for pos in ppos_req:
        print("Part of Speech: " + pos)
        print("Precision when both vertices common in Predicted and Original: " + str(bvP[i]))
        print("Recall when both vertices commin in Original and Data: " + str(bvR[i]) + "\n")
        print("Common vertices legend: 0 = No vertex, 1 = lang1, 2 = lang2, 3 = both vertices")
        print("In Predicted, Not in Original, Classified by Original, as percentage of Predicted (Precision')")
        for j in range(1, 5):
            print(str(j-1) + " - " + str(ipnoco[j][i]))
        print("In Original, Not in Predicted, Classified by Input, as percentage of Original (Recall)")
        for j in range(1, 5):
            print(str(j-1) + " - " + str(ionpci[j][i]))
        
        print("")
        i+=1

In [19]:
def printdatacount(metrics):
    for pos in ppos_req:
        ratio = metrics[pos].PE*100/metrics[pos].OE
        print(pos + " - Translations in Apertium Bidix: " + str(metrics[pos].OE) + 
                  ", Percentage of Translations Predicted: %.2f%%" % ratio
             )
    
def prefixSum(arr):
    ps = []
    for i in range(len(arr)):
        ps.append([])
        for j in range(len(arr[i])):
            ps[i].append(arr[i][j])
            if(i>0):
                ps[i][j] += ps[i-1][j]
    return ps

def plot4in1(bvP, bvR, ipnoco, ionpci):
    fig = go.Figure()
    fig.add_trace(go.Bar(name="Both Vertex Precision", x=ppos_req, y=bvP, base=0, offset=-0.4, width=0.2))
    fig.add_trace(go.Bar(name="Both Vertex Recall", x=ppos_req, y=bvR, base=0, offset=-0.2, width=0.2))
    
    PSipnoco = prefixSum(ipnoco)
    PSionpci = prefixSum(ionpci)
    
    for i in range(4, 0, -1):
        name = "iPnOcO" + str(i-1)
        fig.add_trace(go.Bar(name=name, x=ppos_req, y=PSipnoco[i], base=0, offset=0, width=0.2))
    fig.add_trace(go.Bar(name="Overall Precision", x=ppos_req, y=PSipnoco[0], base=0, offset=0, width=0.2))
    
    for i in range(4, 0, -1):
        name = "iOnPci" + str(i-1)
        fig.add_trace(go.Bar(name=name, x=ppos_req, y=PSionpci[i], base=0, offset=0.2, width=0.2))
    fig.add_trace(go.Bar(name="Overall Recall", x=ppos_req, y=PSionpci[0], base=0, offset=0.2, width=0.2))
    
    
    fig.update_layout(barmode='group', width=1000, height=800, xaxis_tickangle=45)
    fig.show()
    
def plot2in2(bvP, bvR, ipnoco, ionpci):
    fig = go.Figure()
    fig2 = go.Figure()
    fig.add_trace(go.Bar(name="Both Vertex Precision", x=ppos_req, y=bvP, base=0, offset=-0.4, width=0.4))
    fig.add_trace(go.Bar(name="Both Vertex Recall", x=ppos_req, y=bvR, base=0, offset=0, width=0.4))
    
    PSipnoco = prefixSum(ipnoco)
    PSionpci = prefixSum(ionpci)
    
    for i in range(4, 0, -1):
        name = "iPnOcO" + str(i-1)
        fig2.add_trace(go.Bar(name=name, x=ppos_req, y=PSipnoco[i], base=0, offset=-0.4, width=0.4))
    fig2.add_trace(go.Bar(name="Overall Precision", x=ppos_req, y=PSipnoco[0], base=0, offset=-0.4, width=0.4))
    
    for i in range(4, 0, -1):
        name = "iOnPci" + str(i-1)
        fig2.add_trace(go.Bar(name=name, x=ppos_req, y=PSionpci[i], base=0, offset=0, width=0.4))
    fig2.add_trace(go.Bar(name="Overall Recall", x=ppos_req, y=PSionpci[0], base=0, offset=0, width=0.4))
    
    
    fig.update_layout(barmode='group', width=1000, height=800, xaxis_tickangle=45)
    fig.show()
    fig2.update_layout(barmode='group', width=1000, height=800, xaxis_tickangle=45)
    fig2.show()

<h2> Hyperparameters Used </h2>
Context Depth - 3 <br>
Maximum Cycle Length - 7 <br>
Source Language Repetition - Not allowed <br>
Cutoff for being 'Large Context' - 5 adjacent vertices to source <br>
Minimum Cycle Length in Large Contexts - 5 <br>
Minimum Cycle Length in Small Contexts - 4 <br>
Confidence Multiplier if Target has degree > 2 - 1.4 <br>
Confidence Threshold for predictions - 0.7 <br>

The graphs have been plotted using plotly, which allows great interactivty: can hover over bars to see values, crop, zoom in, zoom out etc. (tools available on hovering a little above the top right of each graph).
<h2> Understanding the Graphs </h2>
Data is visualized for 10 categories. The first 'category' shows overall results. This is followed by 9 part of speech categories roughly in decreasing order of priority.<br> 
The first graph contains 2 bars for each category:
<ol>
   <li> The blue bar shows precision when both words of the predicted translation are in the apertium bilingual dictionary for the language pair. This partially resolves data insufficiency issues (language-pairs are not exhaustive with all translations). It is still possible that the actual precision is higher as the predicted translations are not wrong, just not present in the language-pair. In some sense, a minimum bound on precision is thus provided.
   <li> The red bar shows recall when both words of an original apertium translation are in the data used (10 other language-pairs) but have not been predicted as a translation. This partially resolves data-insufficiency issues (other language-pairs not having data to infer this translation). 
</ol>
The second graph contains 2 bars for each category, each bar split into 5 parts:
<ol>
    <li> The first bar compares Predicted translations to Original (Apertium bidix) translations. The lowest part (in orange) shows percentage of predicted translations that were in original data. This is annotated overall precision which may be misleading as Apertium data can be incomplete (refer to graph 1 for realistic numbers). The extra translations not in original data are further classified into 4 categories based on Original data. Note iPnOcO stands for In Predicted, Not in Original, Classified by Original.
    <ol>
        <li> Category iPnOcO 0: Neither of the words in the predicted translation are in Original data.
        <li> Category iPnOcO 1: Language 1 (english in "en-es" pair) word of translation is in Original data.
        <li> Category iPnOcO 2: Language 2 (spanish in "en-es" pair) word of translation is in Original data.
        <li> Category iPnOcO 3: Both words of the predicted translation are in Original Data.
    </ol>
    <br>
    <li> The second bar compares Original translations to Predicted translations. The lowest part (in yellow) shows percentage of original translations that were in the predicted data. This is annotated overall recall. The recall shown in Graph 1 is slightly different, in that it ignores translations which could not possibly have been inferred as either one or both words don't exist in Input data (other 10 language-pairs). The translations in Original that are not in Predicted are classified in 4 categories based on Input data. Note iOnPcI stands for In Original, Not in Predicted, Classified by Input.
    <ol>
        <li> Category iOnPcI 0: Neither of the words in the original translation are in input data.
        <li> Category iOnPcI 1: Language 1 (english in "en-es" pair) word of translation is in input data.
        <li> Category iOnPcI 2: Language 2 (spanish in "en-es" pair) word of translation is in input data.
        <li> Category iOnPcI 3: Both words of the predicted translation are in input data.
    </ol>
</ol>

<h4> Overall Results </h4>

In [32]:
langlist = ["en-es", "en-ca", "fr-es", "fr-ca", "eo-fr", "eo-ca", "eo-en", "eo-es", "oc-ca", "oc-es", "oc-fr"]
avgpredratio = {}
for pos in ppos_req:
    avgpredratio.update({pos : 0})
abvP, abvR, aipnoco, aionpci = [0]*10, [0]*10, [ [0]*10 for _ in range(5)], [ [0]*10 for _ in range(5)]
for lang in langlist:
    metrics = getRaw(lang)
    for pos in ppos_req:
        avgpredratio[pos] += metrics[pos].PE*100/metrics[pos].OE
    bvP, bvR, ipnoco, ionpci = compute(metrics)
    for i in range(len(bvP)):
        abvP[i] += bvP[i]
        abvR[i] += bvR[i]
        for j in range(5):
            aipnoco[j][i] += ipnoco[j][i]
            aionpci[j][i] += ionpci[j][i]
    
for i in range(10):
    abvP[i]/=len(langlist)
    abvR[i]/=len(langlist)
    for j in range(5):
        aipnoco[j][i]/=len(langlist)
        aionpci[j][i]/=len(langlist)
for pos in avgpredratio:
    avgpredratio[pos]/=len(langlist)
    
for pos in ppos_req:
    print(pos + " - Percentage of Translations Predicted: %.2f%%" % avgpredratio[pos] )
plot2in2(abvP, abvR, aipnoco, aionpci)



Language: en-es
Language: en-ca
Language: fr-es
Language: fr-ca
Language: eo-fr
Language: eo-ca
Language: eo-en
Language: eo-es
Language: oc-ca
Language: oc-es
Language: oc-fr
overall - Percentage of Translations Predicted: 25.63%
noun - Percentage of Translations Predicted: 68.65%
verb - Percentage of Translations Predicted: 36.09%
properNoun - Percentage of Translations Predicted: 55.01%
adjective - Percentage of Translations Predicted: 27.80%
adverb - Percentage of Translations Predicted: 30.88%
determiner - Percentage of Translations Predicted: 68.45%
numeral - Percentage of Translations Predicted: 20.57%
pronoun - Percentage of Translations Predicted: 78.62%
preposition - Percentage of Translations Predicted: 56.01%


<h4> Analysis </h4>
In the experiment, 11 language pairs are taken. Each language pair is removed and generated using the other 10. The results are averaged (divided by 11) across POS categories so that all language-pairs (large and small in data) are given equal weightage. The percentages presented here-on are averages. <br>
The prediction set size is 25.63% of Apertium bidix. This ratio is very high (78%) for pronouns, high (68%) for nouns and determiners and also good for properNouns (55%). Notice that the overall average is lower than all categories except numerals (20%).
    <summary> Is this an error? No. </summary>
    This is not because of some error, but rather because the overall is an average over language-pairs, which in turn are a weighted average of the categories. The category-wise %ages are instead direct averages across language-pairs. In a particular language-pair, it is intuitive that when the predicted percentage is high for a category, that category is probably going to have less translations originally (which is used for weighting). Thus the weighting for language-pair overall computation brings the overall average down.

<h5> Graph 1 - Both vertices common </h5>
Precision is fairly high at 88%. Particularly good for pronouns (99.5%) and numerals (97.8%). Low for pronouns (67%), prepositions (75%) and slightly for determiners and adverbs too (~81-82%). Note however that these 4 have the highest recalls (Determiners 62%, Prepositions 52%, Pronouns 43%, Adverbs 38%). This is high when compared to an overall average of 24%. ProperNouns have particularly low recall (14%). This is a little concerning as both vertices are present, but also expected because many language pairs may not have the same set of properNouns leading to a lack of cycles.
<h5> Graph 2 - Overall classification </h5>
Overall precision is 60% and recall 13%. The significant drop in both compared to Graph 1 just shows the incomplete-ness of Apertium data. Notice that the differences between the 2 sub-categories of one-vertex prec/recall in this graph don't mean much because the choice of lang1 and lang2 was arbitrary for a pair, and trends comparing these 2 are purely co-incidental. 
<b> Left bar </b> (% wrt predicted data compared to apertium lang-pair (original) ): </b> <br>
Most 'precision-misses' are mostly because neither vertex is present (22%, 55% of extra predictions). One vertex is missing in 13% cases whereas both in only 8%. This is good. Neither vertex is particularly prevalent in Nouns (23%), ProperNouns (37%). Both vertices missing happen prevalently in adverbs (14%), determiners (17%), pronouns (22%), and prepositions (21%). 1-vertex-missing is the major cause of 'extra' predictions only in pronouns (27%) and perhaps significant in determiners and adverbs. <br>
<b> Right bar </b> (% wrt apertium lang-pair data (original) compared to input data): <br>
Overall recall-misses are least due to neither vertex being there in the original data (12%, 15% of misses). 1 word missing (35%) and both present (40%) are the significant categories. Overall recall has trends quite similar to recall as in Graph 1. Neither vertex misses are significant mainly in ProperNouns (16%) and adverbs (17%). Both vertex recall misses are roughly similar across categories (30%-50%), with verbs (50%) on the higher side and properNouns/prepositions (both 32%) on the lower. 1-vertex-recall is lower for pronouns (20%) and particularly high mainly for properNouns (46%) and numerals (37%). <br>
It may be worth-while to divide the both-vertex data into have-common-bicomp or not for further breakdown/analysis. This is because vertices that share a biconnected component have a cycle through both of them.

In [21]:
metrics = getRaw("en-es")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: en-es
overall - Translations in Apertium Bidix: 32036, Percentage of Translations Predicted: 7.18%
noun - Translations in Apertium Bidix: 33315, Percentage of Translations Predicted: 3.56%
verb - Translations in Apertium Bidix: 7331, Percentage of Translations Predicted: 7.07%
properNoun - Translations in Apertium Bidix: 7992, Percentage of Translations Predicted: 0.35%
adjective - Translations in Apertium Bidix: 9497, Percentage of Translations Predicted: 14.34%
adverb - Translations in Apertium Bidix: 4443, Percentage of Translations Predicted: 23.70%
determiner - Translations in Apertium Bidix: 210, Percentage of Translations Predicted: 18.10%
numeral - Translations in Apertium Bidix: 100, Percentage of Translations Predicted: 6.00%
pronoun - Translations in Apertium Bidix: 406, Percentage of Translations Predicted: 37.68%
preposition - Translations in Apertium Bidix: 372, Percentage of Translations Predicted: 52.69%


<h4> Analysis </h4>
The language-pair is medium sized with 32k edges. The prediction set size is 7.18% of Apertium bidix. This ratio is very very low for nouns (3.56%) and especially properNouns (0.35%).
<h5> Graph 1 - Both vertices common </h5>
The precision is fairly high, ~86%. Recall is however low at ~12%. As evident for all future language pairs, properNouns have near 100% precision. Numerals too. Here Verbs, Adjectives and Determiners have 90-95% Precision. As evident for all language-pairs, pronouns perform poorly. Recall is decent for the last 4 categories, but low earlier, lowest for properNouns (~3%). 

<h5> Graph 2 - Overall classification </h5>
<b> Left bar </b> (% wrt predicted data compared to apertium lang-pair (original) ): </b> <br>
The overall precision is very high in Determiners and Numerals. In properNouns, all extra predictions have neither word in the original data which shows great structure. 13% of predictions overall are extra but with both vertices present in original data. This number is particularly high for pronouns, prepositions, adverbs and nouns. The english equivalent of 6% verbs and 12% adjectives predicted translations) is not there in Apertium data. <br>
<b> Right bar </b> (% wrt apertium lang-pair data (original) compared to input data): <br>
Overall recall is low (5.3%), especially in properNouns(0.2%) and nouns (2.9%). It is higher in adverbs, determiners and especially prepositions (39%). Overall, 41% of original data translations have both words in input data, which is unfortunately high. In Verbs, it is as high as 59%. On the contrary, in 23% of original translations, neither word is there in input data. This is especially high in properNouns (36%), determiners (39%) and pronouns (34%). There are suggestions of Spanish input data being lower compared to this language pair for adverbs (37%), verbs (17%), numerals (58%) and determiners. On the other hand more English data is missing for properNouns (61%), nouns (20%).

In [22]:
metrics = getRaw("en-ca")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: en-ca
overall - Translations in Apertium Bidix: 13159, Percentage of Translations Predicted: 47.70%
noun - Translations in Apertium Bidix: 1514, Percentage of Translations Predicted: 483.49%
verb - Translations in Apertium Bidix: 7648, Percentage of Translations Predicted: 19.67%
properNoun - Translations in Apertium Bidix: 1080, Percentage of Translations Predicted: 35.37%
adjective - Translations in Apertium Bidix: 6952, Percentage of Translations Predicted: 29.34%
adverb - Translations in Apertium Bidix: 8202, Percentage of Translations Predicted: 11.53%
determiner - Translations in Apertium Bidix: 130, Percentage of Translations Predicted: 38.46%
numeral - Translations in Apertium Bidix: 98, Percentage of Translations Predicted: 12.24%
pronoun - Translations in Apertium Bidix: 238, Percentage of Translations Predicted: 37.39%
preposition - Translations in Apertium Bidix: 332, Percentage of Translations Predicted: 52.41%


<h4> Analysis </h4>
The language-pair is small sized with 13k edges. The prediction set size is (47%) of Apertium bidix (quite high ratio). It is especially high for nouns (483% of original size) which suggests the data has very little information on nouns. 
<h5> Graph 1 </h5>
The precision is fairly high, ~91%. Recall is however low at ~12%. As evident for all future language pairs, properNouns have near 100% precision. Numerals too. Here Verbs, Adjectives and Determiners have 90-95% Precision. As evident for all language-pairs, pronouns perform poorly. Recall is decent for the last 4 categories, but low earlier, lowest for properNouns (~3%). 

<h5> Graph 2 </h5>
<b> Left bar </b> (% wrt predicted data compared to apertium lang-pair (original) ): </b> <br>
The overall precision is very high in Determiners and Numerals. In properNouns, all extra predictions have neither word in the original data which shows great structure. 13% of predictions overall are extra but with both vertices present in original data. This number is particularly high for pronouns, prepositions, adverbs and nouns. The english equivalent of 6% verbs and 12% adjectives predicted translations) is not there in Apertium data. <br>
<b> Right bar </b> (% wrt apertium lang-pair data (original) compared to input data): <br>
Overall recall is low (5.3%), especially in properNouns(0.2%) and nouns (2.9%). It is higher in adverbs, determiners and especially prepositions (39%). Overall, 41% of original data translations have both words in input data, which is unfortunately high. In Verbs, it is as high as 59%. On the contrary, in 23% of original translations, neither word is there in input data. This is especially high in properNouns (36%), determiners (39%) and pronouns (34%). There are suggestions of Spanish input data being lower compared to this language pair for adverbs (37%), verbs (17%), numerals (58%) and determiners. On the other hand more English data is missing for properNouns (61%), nouns (20%).

In [23]:
metrics = getRaw("fr-es")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: fr-es
overall - Translations in Apertium Bidix: 22330, Percentage of Translations Predicted: 45.10%
noun - Translations in Apertium Bidix: 16436, Percentage of Translations Predicted: 59.03%
verb - Translations in Apertium Bidix: 5080, Percentage of Translations Predicted: 35.67%
properNoun - Translations in Apertium Bidix: 15650, Percentage of Translations Predicted: 24.89%
adjective - Translations in Apertium Bidix: 4860, Percentage of Translations Predicted: 58.81%
adverb - Translations in Apertium Bidix: 1733, Percentage of Translations Predicted: 64.11%
determiner - Translations in Apertium Bidix: 62, Percentage of Translations Predicted: 70.97%
numeral - Translations in Apertium Bidix: 270, Percentage of Translations Predicted: 26.67%
pronoun - Translations in Apertium Bidix: 136, Percentage of Translations Predicted: 211.76%
preposition - Translations in Apertium Bidix: 326, Percentage of Translations Predicted: 85.28%


In [24]:
metrics = getRaw("fr-ca")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: fr-ca
overall - Translations in Apertium Bidix: 126730, Percentage of Translations Predicted: 5.04%
noun - Translations in Apertium Bidix: 121569, Percentage of Translations Predicted: 4.77%
verb - Translations in Apertium Bidix: 9602, Percentage of Translations Predicted: 21.45%
properNoun - Translations in Apertium Bidix: 98894, Percentage of Translations Predicted: 1.42%
adjective - Translations in Apertium Bidix: 18146, Percentage of Translations Predicted: 10.47%
adverb - Translations in Apertium Bidix: 4152, Percentage of Translations Predicted: 24.88%
determiner - Translations in Apertium Bidix: 56, Percentage of Translations Predicted: 116.07%
numeral - Translations in Apertium Bidix: 142, Percentage of Translations Predicted: 54.93%
pronoun - Translations in Apertium Bidix: 259, Percentage of Translations Predicted: 71.43%
preposition - Translations in Apertium Bidix: 484, Percentage of Translations Predicted: 38.84%


In [25]:
metrics = getRaw("eo-fr")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: eo-fr
overall - Translations in Apertium Bidix: 36518, Percentage of Translations Predicted: 29.39%
noun - Translations in Apertium Bidix: 43184, Percentage of Translations Predicted: 23.43%
verb - Translations in Apertium Bidix: 4232, Percentage of Translations Predicted: 51.98%
properNoun - Translations in Apertium Bidix: 5834, Percentage of Translations Predicted: 96.02%
adjective - Translations in Apertium Bidix: 13216, Percentage of Translations Predicted: 18.17%
adverb - Translations in Apertium Bidix: 5189, Percentage of Translations Predicted: 14.95%
determiner - Translations in Apertium Bidix: 80, Percentage of Translations Predicted: 65.00%
numeral - Translations in Apertium Bidix: 406, Percentage of Translations Predicted: 5.91%
pronoun - Translations in Apertium Bidix: 199, Percentage of Translations Predicted: 41.71%
preposition - Translations in Apertium Bidix: 562, Percentage of Translations Predicted: 27.05%


In [26]:
metrics = getRaw("eo-ca")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: eo-ca
overall - Translations in Apertium Bidix: 51032, Percentage of Translations Predicted: 12.50%
noun - Translations in Apertium Bidix: 33094, Percentage of Translations Predicted: 20.11%
verb - Translations in Apertium Bidix: 6624, Percentage of Translations Predicted: 18.42%
properNoun - Translations in Apertium Bidix: 50486, Percentage of Translations Predicted: 0.85%
adjective - Translations in Apertium Bidix: 7010, Percentage of Translations Predicted: 41.16%
adverb - Translations in Apertium Bidix: 4344, Percentage of Translations Predicted: 26.04%
determiner - Translations in Apertium Bidix: 106, Percentage of Translations Predicted: 67.92%
numeral - Translations in Apertium Bidix: 30, Percentage of Translations Predicted: 40.00%
pronoun - Translations in Apertium Bidix: 88, Percentage of Translations Predicted: 148.86%
preposition - Translations in Apertium Bidix: 234, Percentage of Translations Predicted: 73.50%


In [27]:
metrics = getRaw("eo-en")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: eo-en
overall - Translations in Apertium Bidix: 32062, Percentage of Translations Predicted: 15.51%
noun - Translations in Apertium Bidix: 32586, Percentage of Translations Predicted: 10.02%
verb - Translations in Apertium Bidix: 12920, Percentage of Translations Predicted: 6.99%
properNoun - Translations in Apertium Bidix: 766, Percentage of Translations Predicted: 347.13%
adjective - Translations in Apertium Bidix: 12532, Percentage of Translations Predicted: 11.80%
adverb - Translations in Apertium Bidix: 4366, Percentage of Translations Predicted: 27.51%
determiner - Translations in Apertium Bidix: 112, Percentage of Translations Predicted: 64.29%
numeral - Translations in Apertium Bidix: 340, Percentage of Translations Predicted: 2.94%
pronoun - Translations in Apertium Bidix: 149, Percentage of Translations Predicted: 71.14%
preposition - Translations in Apertium Bidix: 212, Percentage of Translations Predicted: 78.30%


In [28]:
metrics = getRaw("eo-es")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: eo-es
overall - Translations in Apertium Bidix: 75255, Percentage of Translations Predicted: 10.78%
noun - Translations in Apertium Bidix: 69283, Percentage of Translations Predicted: 13.70%
verb - Translations in Apertium Bidix: 1704, Percentage of Translations Predicted: 128.70%
properNoun - Translations in Apertium Bidix: 62511, Percentage of Translations Predicted: 0.86%
adjective - Translations in Apertium Bidix: 9914, Percentage of Translations Predicted: 27.13%
adverb - Translations in Apertium Bidix: 6184, Percentage of Translations Predicted: 14.04%
determiner - Translations in Apertium Bidix: 84, Percentage of Translations Predicted: 69.05%
numeral - Translations in Apertium Bidix: 36, Percentage of Translations Predicted: 66.67%
pronoun - Translations in Apertium Bidix: 158, Percentage of Translations Predicted: 79.11%
preposition - Translations in Apertium Bidix: 468, Percentage of Translations Predicted: 34.19%


In [29]:
metrics = getRaw("oc-ca")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: oc-ca
overall - Translations in Apertium Bidix: 15876, Percentage of Translations Predicted: 69.68%
noun - Translations in Apertium Bidix: 12462, Percentage of Translations Predicted: 88.57%
verb - Translations in Apertium Bidix: 4584, Percentage of Translations Predicted: 57.33%
properNoun - Translations in Apertium Bidix: 6636, Percentage of Translations Predicted: 85.49%
adjective - Translations in Apertium Bidix: 4452, Percentage of Translations Predicted: 33.74%
adverb - Translations in Apertium Bidix: 2291, Percentage of Translations Predicted: 35.40%
determiner - Translations in Apertium Bidix: 133, Percentage of Translations Predicted: 51.13%
numeral - Translations in Apertium Bidix: 362, Percentage of Translations Predicted: 7.18%
pronoun - Translations in Apertium Bidix: 236, Percentage of Translations Predicted: 28.39%
preposition - Translations in Apertium Bidix: 416, Percentage of Translations Predicted: 69.71%


In [30]:
metrics = getRaw("oc-es")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: oc-es
overall - Translations in Apertium Bidix: 16352, Percentage of Translations Predicted: 30.04%
noun - Translations in Apertium Bidix: 13878, Percentage of Translations Predicted: 38.46%
verb - Translations in Apertium Bidix: 4540, Percentage of Translations Predicted: 25.33%
properNoun - Translations in Apertium Bidix: 6578, Percentage of Translations Predicted: 11.95%
adjective - Translations in Apertium Bidix: 4220, Percentage of Translations Predicted: 32.04%
adverb - Translations in Apertium Bidix: 2153, Percentage of Translations Predicted: 35.35%
determiner - Translations in Apertium Bidix: 100, Percentage of Translations Predicted: 50.00%
numeral - Translations in Apertium Bidix: 446, Percentage of Translations Predicted: 0.45%
pronoun - Translations in Apertium Bidix: 238, Percentage of Translations Predicted: 39.08%
preposition - Translations in Apertium Bidix: 398, Percentage of Translations Predicted: 61.31%


In [31]:
metrics = getRaw("oc-fr")
bvP, bvR, ipnoco, ionpci = compute(metrics)
printdatacount(metrics)
plot2in2(bvP, bvR, ipnoco, ionpci)

Language: oc-fr
overall - Translations in Apertium Bidix: 82087, Percentage of Translations Predicted: 9.04%
noun - Translations in Apertium Bidix: 81189, Percentage of Translations Predicted: 10.03%
verb - Translations in Apertium Bidix: 6530, Percentage of Translations Predicted: 24.38%
properNoun - Translations in Apertium Bidix: 63946, Percentage of Translations Predicted: 0.82%
adjective - Translations in Apertium Bidix: 8714, Percentage of Translations Predicted: 28.80%
adverb - Translations in Apertium Bidix: 2323, Percentage of Translations Predicted: 62.20%
determiner - Translations in Apertium Bidix: 62, Percentage of Translations Predicted: 141.94%
numeral - Translations in Apertium Bidix: 430, Percentage of Translations Predicted: 3.26%
pronoun - Translations in Apertium Bidix: 177, Percentage of Translations Predicted: 98.31%
preposition - Translations in Apertium Bidix: 710, Percentage of Translations Predicted: 42.82%
