In [1]:
from hyphenate import hyphenate_word
from model import sp_syllabler
import pickle
from nltk.metrics.distance import edit_distance
import pandas as pd
from hyphenate import hyphenate_word
from tensorflow.keras.preprocessing.sequence import pad_sequences
import time
import random
import numpy as np
import syllables
import pyphen

In [4]:
# 1
# prepping and calling functions

def get_probability(y_val):
    num_pos = 0
    num_neg = 0
    total = 0
    for word in y_val:
        for c in word:
            if c == 2:
                num_pos += 1
                total += 1
            elif c == 1:
                num_neg += 1
                total += 1
    return float(num_pos)/float(total)

def calc_brier(attempted, probability):
    total = 0
    sum_brier = 0
    for word in attempted:
        for c in attempted:
            total += 1
            if c == 2:
                sum_brier += (probability - 1)**2
            elif c == 1:
                sum_brier += (probability - 0)**2
    return (1./total)*(sum_brier)

def calc_f1(attempted, true):
    true_pos = 0
    true_neg = 0
    false_pos = 0
    false_neg = 0
    correct_num_char = 0
    total_checked = 0
    for i in range(0, len(attempted)):
        total_checked += 1
        if (len(attempted[i]) == len(true[i])):
            correct_num_char += 1
            for j in range(0, len(attempted[i])):
                if(attempted[i][j] == true[i][j]):
                    if true[i][j] == 1:
                        true_neg += 1
                    elif true[i][j] == 2:
                        true_pos += 1
                else:
                    if true[i][j] == 1:
                        false_pos += 1
                    elif true[i][j] == 2:
                        false_neg += 1
    
    precision = true_pos/(true_pos+false_pos)
    recall = true_pos/(true_pos + false_neg)
    f_one = 2/((1/precision)+(1/recall))
    
    return total_checked, correct_num_char, true_pos, true_neg, false_pos, false_neg, precision, recall, f_one

def convert_to_hot(syl_word):
    hot = []
    i = 0
    while  i < len(syl_word):
        if i == len(syl_word) - 1:
            hot += [1]
            return hot
        if syl_word[i+1] == '-':
            hot += [2]
            i += 2
        else:
            hot += [1]
            i += 1
    return hot

def to_categorical(sequences):
        cat_sequences = []
        for s in sequences:
            cats = []
            for item in s:
                cats.append(np.zeros(3))
                cats[-1][item] = 1.0
            cat_sequences.append(cats)
        return np.array(cat_sequences)

def data_prep(random_seed):
    random.seed(random_seed)
    training_data_size = 20000
    e2i_vocab_ortho = []

    x_tr_ortho = []
    y_tr = []

    x_val_ortho = []
    y_val = []

    orig_file = open('data/post_clean.txt')

    orig_data = orig_file.readlines()
    orig_file.close()
    orig_data = [line.strip('\n') for line in orig_data]
    random.shuffle(orig_data)
    data_eng = [line.split(';')[0].lower() for line in orig_data]
    data_syl = [line.split(';')[1].lower() for line in orig_data]
    y_tr = []

    for line in data_eng:
        for c in line:
            if c not in e2i_vocab_ortho:
                e2i_vocab_ortho += [c]

    e2i_ortho = dict((a,i) for i,a in enumerate(e2i_vocab_ortho, 1))

    for line in data_eng[:training_data_size]:
        converted = []
        for c in line:
            converted += [e2i_ortho[c]]
        x_tr_ortho += [converted]

    x_tr_ortho = pad_sequences(x_tr_ortho, maxlen=20, padding='post')

    for line in data_syl[:training_data_size]:
        y_tr += [convert_to_hot(line)]

    y_tr = pad_sequences(y_tr, maxlen=20, padding='post')
    
    x_val_ortho = []
    y_val = []

    for line in data_eng[training_data_size:]:
        converted = []
        for c in line:
            converted += [e2i_ortho[c]]
        x_val_ortho += [converted]

    x_val_ortho = pad_sequences(x_val_ortho, maxlen=20, padding='post')

    for line in data_syl[training_data_size:]:
        y_val += [convert_to_hot(line)]

    y_val = pad_sequences(y_val, maxlen=20, padding='post')
    return x_tr_ortho, y_tr, x_val_ortho, y_val, e2i_ortho

def training_split(x_tr_ortho, y_tr):
    y_tr = to_categorical(y_tr)
    split_index = int(.8 * len(x_tr_ortho))

    x_test_ortho = x_tr_ortho[split_index:]
    y_test = y_tr[split_index:]

    x_tr_ortho = x_tr_ortho[:split_index]
    y_tr = y_tr[:split_index]
    return x_tr_ortho, x_test_ortho, y_tr, y_test

def train_model(sp, x_tr_ortho, y_tr, x_test_ortho, y_test, run_id):
    sp.fit(x_tr_ortho, y_tr, x_test_ortho, y_test, ep=70, batch_size=128, save_filename="eval_runs/%i_single_pen_best_weights.h5"%run_id, verbose=0)
    
def sp_attempts(sp, x_val_ortho):
    attempts = []
    for i in range(0, len(x_val_ortho)):
        attempts += [sp.raw_syllabify(x_val_ortho[i])]
        print(i, end='\r')
    attempts_stripped = []
    for x in attempts:
        attempts_stripped += [[i for i in x if i !=0]]
    return attempts_stripped

def back_to_eng(x_val_ortho, e2i_ortho):
    converted_back_to_eng = []
    for x in x_val_ortho:
        real_word = ""
        for i in x:
                if i != 0:
                    real_word += list(e2i_ortho.keys())[list(e2i_ortho.values()).index(i)]
        converted_back_to_eng += [real_word]
    return converted_back_to_eng

def insert_and_rehot(sp, attempts, converted_back_to_eng):
    eng_conv_attempts = []
    for i in range(0, len(attempts)):
        eng_conv_attempts += [sp.insert_syl(converted_back_to_eng[i], attempts[i])]
    rehot_attempts = []
    for word in eng_conv_attempts:
        rehot_attempts += [convert_to_hot(word)]
    return rehot_attempts

def hyphenator_run(converted_back_to_eng):
    liang_attempts = []
    for word in converted_back_to_eng:
        liang_attempts += ['-'.join(hyphenate_word(word))]

    liang_attempts_hot_encoded = [convert_to_hot(word) for word in liang_attempts]
    return liang_attempts_hot_encoded

def pyph_run(converted_back_to_eng):
    Pyphenator = pyphen.Pyphen(lang='en_US', left=1, right=1)
    pyph_attempts = []
    for word in converted_back_to_eng:
        pyph_attempts += [Pyphenator.inserted(word)]
    pyph_attempts_hot =  [convert_to_hot(word) for word in pyph_attempts]
    return pyph_attempts_hot

def inconsistency_grab(sp, attempts, reals, converted_back_to_eng, run_id, sp_hyph):
    filename = 'final_evaluation/incorrect_syls/'+ sp_hyph + '_run_%i_incorrect_syls.txt'%run_id
    sum_lev = 0
    incorrect_counter = 0
    incorrect_syl_count = 0
    file = open(filename, 'w+', encoding='utf-8')
    file.write('Attempt' + '\t' + 'Real' + '\n')
    for i in range(0, len(attempts)):
        if attempts[i] != reals[i]:
            incorrect_counter += 1
            a = sp.insert_syl(converted_back_to_eng[i], attempts[i])
            r = sp.insert_syl(converted_back_to_eng[i], reals[i])
            if len(a.split('-')) != len(r.split('-')):
                incorrect_syl_count += 1
            sum_lev += edit_distance(a,r,substitution_cost=1, transpositions=True)
            file.write(a + '\t' + r + '\n')
            
    syllable_accuracy = ((len(attempts) - incorrect_syl_count)/len(attempts))
    av_lev_dist = (sum_lev/incorrect_counter)
    file.write("Words with errors: %i"%incorrect_counter +'\n')
    file.write("Words with incorrect number of syllables: %i"%incorrect_syl_count +'\n')
    file.write("Total evluated: %i"%len(attempts) +'\n')
    file.write("Perfect accuracy: %.2f"%((len(attempts) - incorrect_counter)/len(attempts)) +'\n')
    file.write("Number of syllables accuracy: %.2f"%((len(attempts) - incorrect_syl_count)/len(attempts)) +'\n')
    file.write("Average Levenshtein Distance(across incorrect words): %.2f"%(sum_lev/incorrect_counter) +'\n')
    file.close()
    return syllable_accuracy, av_lev_dist

def strip_y_val(y_val):
    reals = []
    for x in y_val:
        reals += [[i for i in x if i !=0]]
    return reals

def store_run_elements(sp, back_to_eng, attempts, reals, run_id, model):
    a = sp.insert_syl(back_to_eng[0],attempts[0])
    r = sp.insert_syl(back_to_eng[0],reals[0])
    curr_df = pd.DataFrame(columns=['given_word', 'real_syllabification', 'attempted_syllabification', 'lev_dist', 'true_syl_count', 'attempted_syl_count'])
    for i in range(1, len(attempts)):
        word_stats = {'given_word':back_to_eng[i], 'real_syllabification':r, 'attempted_syllabification':a, 'lev_dist':'null', 'true_syl_count':'null', 'attempted_syl_count':'null'}
        word_stats['lev_dist'] = edit_distance(a,r,substitution_cost=1, transpositions=True)
        word_stats['attempted_syl_count'] = len(a.split('-'))
        word_stats['true_syl_count'] = len(r.split('-'))
        curr_df = curr_df.append(word_stats,ignore_index=True)
    curr_df.to_csv("final_evaluation/individual_elements/run_%i_%s_word_stats.csv"%(run_id,model), sep=',', index=False, encoding='utf-8')

In [5]:
# 3
# full run

# instantiating a dataframe for recording run data
df = pd.DataFrame(columns=['run_id', 'random_seed','val_sample_size','training_time_seconds' ,'y_val_syl_prob', 'sp_f1_score', 'sp_precision', 'sp_recall','sp_brier_score','sp_syllable_accuracy','sp_average_lev', 'hyph_f1_score','hyph_precision', 'hyph_recall', 'hyph_brier_score','hyph_syllable_accuracy','hyph_average_lev','pyph_f1_score','pyph_precision', 'pyph_recall', 'pyph_brier_score','pyph_syllable_accuracy','pyph_average_lev'])
sp = None

for i in range(1,11):
    print("run: %i"%i)
    random_seed = int(time.time())
    run_stats = {'run_id':i, 'random_seed':random_seed,'val_sample_size':'null','training_time_seconds':'null', 'y_val_syl_prob':'null', 'sp_f1_score':'null', 'sp_precision':'null', 'sp_recall':'null','sp_brier_score':'null','sp_syllable_accuracy':'null','sp_average_lev':'null', 'hyph_f1_score':'null','hyph_precision':'null', 'hyph_recall':'null', 'hyph_brier_score':'null','hyph_syllable_accuracy':'null','hyph_average_lev':'null', 'pyph_f1_score':'null','pyph_precision':'null', 'pyph_recall':'null', 'pyph_brier_score':'null','pyph_syllable_accuracy':'null','pyph_average_lev':'null'}
    
    x_tr_ortho, y_tr, x_val_ortho, y_val, e2i_ortho = data_prep(random_seed=random_seed)
    
    run_stats['val_sample_size'] = len(x_val_ortho)
    
    x_tr_ortho, x_test_ortho, y_tr, y_test = training_split(x_tr_ortho=x_tr_ortho, y_tr=y_tr)
    
    del sp
    sp = sp_syllabler(e2i_ortho= e2i_ortho, ortho_input_size=20,latent_dim=32,embed_dim=32 ,max_feat=36)
    
    start = time.time()
    print("Begin sp training, run: %i"%i)
    train_model(sp, x_tr_ortho, y_tr, x_test_ortho, y_test, run_id=i)
    print("End sp training, run: %i"%i)
    end = time.time()
    run_stats['training_time_seconds'] = end - start
    
    print("Begin sp attempts, run: %i"%i)
    sp_attempts_array = sp_attempts(sp, x_val_ortho)
    print("Completed sp attempts, run: %i"%i)
    
    converted_back_to_eng = back_to_eng(x_val_ortho, e2i_ortho)
    
    sp_rehot_attempts = insert_and_rehot(sp, sp_attempts_array, converted_back_to_eng)
    
    liang_attempts_hot_encoded = hyphenator_run(converted_back_to_eng)
    
    syl_prob = get_probability(y_val)
    
    run_stats['y_val_syl_prob'] = syl_prob
    
    reals = strip_y_val(y_val)
    
    sp_total_checked, sp_correct_num_char, sp_true_pos, sp_true_neg, sp_false_pos, sp_false_neg, sp_precision, sp_recall, sp_f_one = calc_f1(sp_rehot_attempts, reals)
    sp_brier = calc_brier(sp_rehot_attempts, syl_prob)
    
    run_stats['sp_f1_score'] = sp_f_one
    run_stats['sp_precision'] = sp_precision
    run_stats['sp_recall'] = sp_recall
    run_stats['sp_brier_score'] = sp_brier
    
    hyph_total_checked, hyph_correct_num_char, hyph_true_pos, hyph_true_neg, hyph_false_pos, hyph_false_neg, hyph_precision, hyph_recall, hyph_f_one = calc_f1(liang_attempts_hot_encoded, reals)
    hyph_brier = calc_brier(liang_attempts_hot_encoded, syl_prob)
    
    run_stats['hyph_f1_score'] = hyph_f_one
    run_stats['hyph_precision'] = hyph_precision
    run_stats['hyph_recall'] = hyph_recall
    run_stats['hyph_brier_score'] = hyph_brier
    
    pyph_attempts_hot = pyph_run(converted_back_to_eng)
    pyph_total_checked, pyph_correct_num_char, pyph_true_pos, pyph_true_neg, pyph_false_pos, pyph_false_neg, pyph_precision, pyph_recall, pyph_f_one = calc_f1(pyph_attempts_hot, reals)
    pyph_brier = calc_brier(pyph_attempts_hot, syl_prob)
    
    run_stats['hyph_f1_score'] = hyph_f_one
    run_stats['hyph_precision'] = hyph_precision
    run_stats['hyph_recall'] = hyph_recall
    run_stats['hyph_brier_score'] = hyph_brier
    
    run_stats['pyph_f1_score'] = pyph_f_one
    run_stats['pyph_precision'] = pyph_precision
    run_stats['pyph_recall'] = pyph_recall
    run_stats['pyph_brier_score'] = pyph_brier
    
    sp_syllable_accuracy, sp_av_lev_dist = inconsistency_grab(sp, sp_rehot_attempts, reals, converted_back_to_eng, run_id=i, sp_hyph='sp')
    hyph_syllable_accuracy, hyph_av_lev_dist = inconsistency_grab(sp, liang_attempts_hot_encoded, reals, converted_back_to_eng, run_id=i, sp_hyph='hyph')
    pyph_syllable_accuracy, pyph_av_lev_dist = inconsistency_grab(sp, pyph_attempts_hot, reals, converted_back_to_eng, run_id=i, sp_hyph='pyph')
    
    
    run_stats['sp_syllable_accuracy'] = sp_syllable_accuracy
    run_stats['sp_average_lev'] = sp_av_lev_dist
    
    run_stats['hyph_syllable_accuracy'] = hyph_syllable_accuracy
    run_stats['hyph_average_lev'] = hyph_av_lev_dist
    
    run_stats['pyph_syllable_accuracy'] = pyph_syllable_accuracy
    run_stats['pyph_average_lev'] = pyph_av_lev_dist
    
    store_run_elements(sp,converted_back_to_eng, sp_rehot_attempts, reals, i, 'sp')
    store_run_elements(sp,converted_back_to_eng, liang_attempts_hot_encoded, reals, i, 'hyph')
    store_run_elements(sp,converted_back_to_eng, pyph_attempts_hot, reals, i, 'pyph')
    
    df = df.append(run_stats,ignore_index=True)
    df.to_csv("final_evaluation/run_%i_liang_sp_comparison.csv"%i, sep=',', index=False, encoding='utf-8')

df.to_csv('final_evaluation/total_liang_sp_comparison.csv', sep=',', index=False, encoding='utf-8')
display(df)

run: 1
Begin sp training, run: 1
Epoch 54: early stopping
End sp training, run: 1
Begin sp attempts, run: 1
Completed sp attempts, run: 1
run: 2
Begin sp training, run: 2
Epoch 65: early stopping
End sp training, run: 2
Begin sp attempts, run: 2
Completed sp attempts, run: 2
run: 3
Begin sp training, run: 3
Epoch 66: early stopping
End sp training, run: 3
Begin sp attempts, run: 3
Completed sp attempts, run: 3
run: 4
Begin sp training, run: 4
Epoch 64: early stopping
End sp training, run: 4
Begin sp attempts, run: 4
Completed sp attempts, run: 4
run: 5
Begin sp training, run: 5
Epoch 54: early stopping
End sp training, run: 5
Begin sp attempts, run: 5
Completed sp attempts, run: 5
run: 6
Begin sp training, run: 6
Epoch 63: early stopping
End sp training, run: 6
Begin sp attempts, run: 6
Completed sp attempts, run: 6
run: 7
Begin sp training, run: 7
Epoch 51: early stopping
End sp training, run: 7
Begin sp attempts, run: 7
Completed sp attempts, run: 7
run: 8
Begin sp training, run: 8
E

Unnamed: 0,run_id,random_seed,val_sample_size,training_time_seconds,y_val_syl_prob,sp_f1_score,sp_precision,sp_recall,sp_brier_score,sp_syllable_accuracy,...,hyph_recall,hyph_brier_score,hyph_syllable_accuracy,hyph_average_lev,pyph_f1_score,pyph_precision,pyph_recall,pyph_brier_score,pyph_syllable_accuracy,pyph_average_lev
0,1.0,1682568000.0,6500.0,392.342035,0.208093,0.896362,0.899934,0.892817,0.0,0.898462,...,0.806848,0.0,0.732462,1.206074,0.859414,0.902961,0.819873,0.0,0.688462,1.235824
1,2.0,1682569000.0,6500.0,495.376108,0.208393,0.905397,0.906458,0.904339,0.0,0.899692,...,0.808958,0.0,0.733538,1.187839,0.858457,0.900596,0.820086,0.0,0.682,1.213687
2,3.0,1682570000.0,6500.0,609.537641,0.206333,0.904579,0.903247,0.905914,0.0,0.899385,...,0.803638,0.0,0.729385,1.187332,0.856687,0.899885,0.817446,0.0,0.684,1.217835
3,4.0,1682571000.0,6500.0,672.423171,0.208356,0.9069,0.906095,0.907705,0.0,0.904,...,0.805966,0.0,0.728,1.197108,0.857941,0.901266,0.81859,0.0,0.681846,1.21766
4,5.0,1682573000.0,6500.0,982.439931,0.206777,0.902138,0.897237,0.907093,0.0,0.902308,...,0.804906,0.0,0.729231,1.173936,0.855189,0.896634,0.817407,0.0,0.681385,1.207047
5,6.0,1682574000.0,6500.0,1619.894697,0.208747,0.907235,0.906183,0.908288,0.0,0.903846,...,0.810816,0.0,0.733692,1.191908,0.862678,0.90594,0.82336,0.0,0.692,1.2198
6,7.0,1682577000.0,6500.0,1679.21955,0.208108,0.900715,0.889426,0.912295,0.0,0.894462,...,0.809911,0.0,0.733538,1.187093,0.859489,0.900369,0.82216,0.0,0.685538,1.214222
7,8.0,1682579000.0,6500.0,2949.424448,0.207341,0.908118,0.902534,0.913772,0.0,0.894769,...,0.806947,0.0,0.729692,1.203235,0.855313,0.895189,0.818837,0.0,0.676,1.224729
8,9.0,1682583000.0,6500.0,3196.814972,0.206732,0.901961,0.894302,0.909752,0.0,0.893846,...,0.809686,0.0,0.732308,1.185225,0.85827,0.898503,0.821486,0.0,0.686769,1.215747
9,10.0,1682587000.0,6500.0,3421.688818,0.207336,0.9014,0.903744,0.899069,0.0,0.893692,...,0.812153,0.0,0.736615,1.19098,0.862301,0.903878,0.824382,0.0,0.691385,1.223996


In [None]:
#cross comparing syllable accuracy with syllables

syllables_results = []

for random_seed in df['random_seed']:
    true_num_syl = []
    x_tr_ortho, y_tr, x_val_ortho, y_val, e2i_ortho = data_prep(random_seed=random_seed)
    converted_back_to_eng = back_to_eng(x_val_ortho, e2i_ortho)
    for x in y_val:
        num_syls = 1
        for c in x:
            if c == 2:
                num_syls += 1
        true_num_syl += [num_syls]
    syl_attempt = []
    for word in converted_back_to_eng:
        syl_attempt += [syllables.estimate(word)]
    
    correct_syls = 0
    for i in range(0, len(syl_attempt)):
        if syl_attempt[i] == true_num_syl[i]:
            correct_syls += 1
    syllables_results += [float(correct_syls)/float(len(syl_attempt))]
    
df['syllable_module_syl_count_accuracy'] = syllables_results
display(df)
df.to_csv('final_evaluation/total_liang_syllables_sp_comparison.csv', sep=',', index=False, encoding='utf-8')

In [None]:
# cross comparing with pyphen
Pyphenator = pyphen.Pyphen(lang='en_US', left=1, right=1)

pyph_f1_arr = []
pyph_precision_arr = []
pyph_recall_arr = []
pyph_av_lev_arr = []
pyph_brier_arr = []
pyph_syllable_accuracy_arr = []

run_num = 0

for random_seed in df['random_seed']:
    run_num += 1
    x_tr_ortho, y_tr, x_val_ortho, y_val, e2i_ortho = data_prep(random_seed=random_seed)
    converted_back_to_eng = back_to_eng(x_val_ortho, e2i_ortho)
    
    pyph_attempts = []
    for x in converted_back_to_eng:
        pyph_attempts += [Pyphenator.inserted(x)]
    pyph_attempts_hot = [convert_to_hot(x) for x in pyph_attempts]
    
    syl_prob = get_probability(y_val)
    
    reals = strip_y_val(y_val)
    
    pyph_total_checked, pyph_correct_num_char, pyph_true_pos, pyph_true_neg, pyph_false_pos, pyph_false_neg, pyph_precision, pyph_recall, pyph_f_one = calc_f1(pyph_attempts_hot, reals)
    pyph_brier = calc_brier(pyph_attempts_hot, syl_prob)
    pyph_syllable_accuracy, pyph_av_lev_dist = inconsistency_grab(sp, pyph_attempts_hot, reals, converted_back_to_eng, run_id=run_num, sp_hyph='pyph')
    
    pyph_f1_arr += [pyph_f_one]
    pyph_precision_arr += [pyph_precision]
    pyph_recall_arr += [pyph_recall]
    pyph_av_lev_arr += [pyph_av_lev_dist]
    pyph_brier_arr += [pyph_brier]
    pyph_syllable_accuracy_arr += [pyph_syllable_accuracy]
    
df['pyph_f1_score'] = pyph_f1_arr
df['pyph_precision'] = pyph_precision_arr
df['pyph_recall'] = pyph_recall_arr
df['pyph_brier_score'] = pyph_brier_arr
df['pyph_syllable_accuracy'] = pyph_syllable_accuracy_arr
df['pyph_average_lev'] = pyph_av_lev_arr

display(df)
df.to_csv('final_evaluation/total_liang_syllables_sp_pyph_comparison.csv', sep=',', index=False, encoding='utf-8')

In [None]:
Pyphenator = pyphen.Pyphen(lang='en_US', left=1, right=1)

word = 'iterator'.encode()
word = 'iterator'

print(Pyphenator.inserted(word))

In [None]:
sum_hyph_f1_score = 0
for x in df['hyph_f1_score']:
    sum_hyph_f1_score += x
print('average hyph f1:')
print(sum_hyph_f1_score/10)

sum_hyph_syl_acc = 0
for x in df['hyph_syllable_accuracy']:
    sum_hyph_syl_acc += x
print('average hyph syl acc:')
print(sum_hyph_syl_acc/10)

sum_hyph_lev = 0
for x in df['hyph_average_lev']:
    sum_hyph_lev += x
print('average hyph lev:')
print(sum_hyph_lev/10)

In [None]:
sum_sp_f1_score = 0
for x in df['sp_f1_score']:
    sum_sp_f1_score += x
print('average sp f1:')
print(sum_sp_f1_score/10)

sum_sp_syl_acc = 0
for x in df['sp_syllable_accuracy']:
    sum_sp_syl_acc += x
print('average sp syl acc:')
print(sum_sp_syl_acc/10)

sum_sp_lev = 0
for x in df['sp_average_lev']:
    sum_sp_lev += x
print('average sp lev:')
print(sum_hyph_lev/10)

In [None]:
print('standard deviation sp_f1_score:')
print(np.std(df['sp_f1_score']))
print('standard deviation sp_syllable_accuracy:')
print(np.std(df['sp_syllable_accuracy']))
print('standard deviation sp_lev:')
print(np.std(df['sp_average_lev']))