In [1]:
import pandas as pd
from sklearn.metrics import classification_report
import re

import pathlib

In [2]:
class RulesAspectClassifier:

    perf_suf_tmpl = re.compile("^(po|pro|na|iz|za|s|u|do|raz|od)")
    imp_suf_tmpl  = re.compile("(iva|ava|ova|uva)$")

    def __init__(self, biase:str="imp"):
        self.biase = biase

    def predict_(self, x: str) -> int:

        x = x.lower()

        if self.perf_suf_tmpl.match(x):
            return "perf"

        elif self.imp_suf_tmpl.search(x):
            return "imp"

        elif x.endswith("ti"):
            return "imp"

        ##  Если ничего не найдено, возвращаем предубеждение
        else:
            return self.biase

    def predict(self, X:list[str]) -> list[str]:
        return [
            self.predict_(x)
            for x
            in X
        ]

In [3]:
data_path = pathlib.Path.cwd() / "data" / "forms2sents.csv"

In [4]:
df = pd.read_csv(data_path, sep="\t", index_col=0)
df.head(2)

Unnamed: 0,lemma,word,aspect,disambig,db_id
0,bacati,baca,imp,imp,set-s2762
1,bacati,bacaju,imp,imp,set-s693


In [5]:
X = df["word"]
y = df["aspect"]

In [6]:
rules_model_imp_biase  = RulesAspectClassifier(biase="imp")
rules_model_perf_biase = RulesAspectClassifier(biase="perf")
rules_model_both_biase = RulesAspectClassifier(biase="both")

In [7]:
y_imp_pred  = rules_model_imp_biase.predict(X)
y_perf_pred = rules_model_perf_biase.predict(X)
y_both_pred = rules_model_both_biase.predict(X)

In [8]:
print(classification_report(y, y_imp_pred, zero_division=0.0))

              precision    recall  f1-score   support

        both       0.00      0.00      0.00       212
         imp       0.60      0.66      0.63      2315
        perf       0.66      0.66      0.66      2432

    accuracy                           0.63      4959
   macro avg       0.42      0.44      0.43      4959
weighted avg       0.60      0.63      0.62      4959



In [9]:
print(classification_report(y, y_perf_pred, zero_division=0.0))

              precision    recall  f1-score   support

        both       0.00      0.00      0.00       212
         imp       0.56      0.05      0.09      2315
        perf       0.49      0.97      0.65      2432

    accuracy                           0.50      4959
   macro avg       0.35      0.34      0.25      4959
weighted avg       0.50      0.50      0.36      4959



In [10]:
print(classification_report(y, y_both_pred, zero_division=0.0))

              precision    recall  f1-score   support

        both       0.08      0.89      0.15       212
         imp       0.56      0.05      0.09      2315
        perf       0.66      0.66      0.66      2432

    accuracy                           0.38      4959
   macro avg       0.43      0.53      0.30      4959
weighted avg       0.59      0.38      0.37      4959

