# Generate KVETA predictions

In [1]:
%load_ext dotenv
%dotenv
import os

base_dir = os.getenv("WORKING_DIR")
os.chdir(base_dir)

In [2]:
from src.data_loader_and_saver import JSONDataLoaderAndSaver

data_loader_and_saver = JSONDataLoaderAndSaver(base_dir, input_data_dir="src/data")
kveta_data_loader_and_saver = JSONDataLoaderAndSaver(base_dir, input_data_dir="src/kveta/data", output_data_dir="src/kveta/data")

In [3]:
from src.kveta.sampa_syllable_parser import SampaSyllableParser
from src.kveta.syllable_class_parser import SyllableClassParser
from src.kveta.metre_generator import MetreGenerator
from src.kveta.metre_assigner import MetreAssigner
from src.kveta.metre_predictor import MetrePredictor

sampa_parser = SampaSyllableParser()
syllable_class_parser = SyllableClassParser()
metre_generator = MetreGenerator()

## All poems just 1 metre, no unknown metres

In [4]:
extension = "_one_metre_all_metres_recognized"

In [5]:
kveta_probabilities = kveta_data_loader_and_saver.load_data(f"kveta_probabilities{extension}")

kveta_probabilities_one_metre_all_metres_recognized.json: loaded 1 records.


In [6]:
metre_assigner = MetreAssigner(kveta_probabilities["metrical_pos_given_syll_cls_proba"])

### Load testing data

In [7]:
test_X = data_loader_and_saver.load_data(f"test_X{extension}")

test_X_one_metre_all_metres_recognized.json: loaded 8601 records.


### Predict

In [8]:
metre_predictor = MetrePredictor(sampa_parser, syllable_class_parser, metre_generator, metre_assigner)

In [9]:
predictions = metre_predictor.predict(test_X)

Analyzing poem 0...
Analyzing poem 1000...
Analyzing poem 2000...
Analyzing poem 3000...
Poem 3469: Too many syllables, no patterns generated, assigning error pattern...
Analyzing poem 4000...
Analyzing poem 5000...
Analyzing poem 6000...
Analyzing poem 7000...
Analyzing poem 8000...


In [10]:
kveta_data_loader_and_saver.save_data(predictions, f"kveta_predictions{extension}")

Data saved to kveta_predictions_one_metre_all_metres_recognized.json


## All lines just 1 metre, no unknown metres

In [11]:
extension = "_one_metre_line_all_metres_recognized"

In [12]:
kveta_probabilities = kveta_data_loader_and_saver.load_data(f"kveta_probabilities{extension}")

kveta_probabilities_one_metre_line_all_metres_recognized.json: loaded 1 records.


In [13]:
metre_assigner = MetreAssigner(kveta_probabilities["metrical_pos_given_syll_cls_proba"])

### Load testing data

In [14]:
test_X = data_loader_and_saver.load_data(f"test_X{extension}")

test_X_one_metre_line_all_metres_recognized.json: loaded 8950 records.


In [15]:
len(test_X)

8950

### Predict

In [16]:
metre_predictor = MetrePredictor(sampa_parser, syllable_class_parser, metre_generator, metre_assigner)

In [17]:
predictions = metre_predictor.predict(test_X)

Analyzing poem 0...
Analyzing poem 1000...
Analyzing poem 2000...
Analyzing poem 3000...
Analyzing poem 4000...
Analyzing poem 5000...
Analyzing poem 6000...
Analyzing poem 7000...
Poem 7295: Too many syllables, no patterns generated, assigning error pattern...
Analyzing poem 8000...


In [19]:
kveta_data_loader_and_saver.save_data(predictions, f"kveta_predictions{extension}")

Data saved to kveta_predictions_one_metre_line_all_metres_recognized.json
