# Set-up

In [1]:
%cd ..

/app


In [17]:
import librosa
import pandas as pd
import numpy as np
import os
import time
import matplotlib.pyplot as plt

from tonami import pitch_process as pp
from tonami import user
from tonami import Utterance as u
from tonami import Classifier as c

In [96]:
# Consts
# USER_TESTING_FILEPATH = 'data/user_audio/testing/'
USER_TESTING_FILEPATH = 'data/users (balanced)/'
DB_THRESHOLD = 10
user_info = user.User(50, 350)
SAMPLERATE = 22050
clf = c.Classifier(4)
clf.load_clf('tonami/data/pickled_svm_80.pkl')

In [97]:
user_audio_dict = {}
raw_data_dict = {}
times_dict = {}

# Run all tasks (long)

In [98]:
# get all files
filenames = os.listdir(USER_TESTING_FILEPATH)
num_files = len(filenames)

user_audio_dict['Filename'] = filenames

In [99]:
# get target tones
targets = [0] * num_files

for i, filename in enumerate(filenames):
    # if i > 2:
    #     targets[i] = int(filename.split("_")[1][-1])
    
    targets[i] = int(filename.split("_")[0][-1])

user_audio_dict['Target'] = targets

In [100]:
# load files
time_series_arr = [None] * num_files
load_times = [0] * num_files

for i, filename in enumerate(filenames):
    start = time.time()
    time_series_arr[i], _ = librosa.load(USER_TESTING_FILEPATH + filename)  
    load_times[i] = time.time() - start

raw_data_dict['Time Series'] = time_series_arr
times_dict['load'] = load_times

In [101]:
# get durations
durations = [0] * num_files

for i, time_series in enumerate(time_series_arr):
    durations[i] = len(time_series)/SAMPLERATE

user_audio_dict['Durations'] = durations

In [102]:
# trim files
f_time_series_arr = [None] * num_files
trim_times = [0] * num_files

for i, time_series in enumerate(time_series_arr):
    start = time.time()
    f_time_series_arr[i], _ = librosa.effects.trim(y=time_series, top_db=DB_THRESHOLD) 
    trim_times[i] = time.time() - start

raw_data_dict['Filtered Time Series'] = f_time_series_arr
times_dict['trim'] = trim_times

In [103]:
# get filtered durations
f_durations = [0] * num_files

for i, f_time_series in enumerate(f_time_series_arr):
    f_durations[i] = len(f_time_series)/SAMPLERATE

user_audio_dict['Filtered Durations'] = f_durations

In [85]:
# get pitch contours
pitch_contour_arr = [None] * num_files
contour_times = [0] * num_files

for i, f_time_series in enumerate(f_time_series_arr):
    start = time.time()
    pitch_contour_arr[i] , _, _ = librosa.pyin(f_time_series, fmin=50, fmax=400)
    contour_times[i] = time.time() - start

raw_data_dict['Pitch Contours'] = pitch_contour_arr
times_dict['contour'] = contour_times

In [86]:
# Tonami's feature extraction   
normalized_pitch_arr = [None] * num_files
nans_arr = [None] * num_files
features_arr = [None] * num_files
extraction_times = [0] * num_files

for i, pitch_contour in enumerate(pitch_contour_arr):
    start = time.time()
    interp, nans_arr[i] = pp.preprocess(pitch_contour)
    interp_np = np.array([interp], dtype=float)
    profile = test_user.get_pitch_profile()
    avgd = pp.moving_average(interp_np)
    normalized_pitch = pp.normalize_pitch(avgd, profile['max_f0'], profile['min_f0'])
    features_arr[i] = np.array([pp.basic_feat_calc(normalized_pitch[0])])
    normalized_pitch_arr[i] = normalized_pitch
    extraction_times[i] = time.time() - start
    
raw_data_dict['Normalized Pitch'] = normalized_pitch_arr
raw_data_dict['Nans'] = nans_arr
raw_data_dict['Features'] = features_arr
times_dict['extraction'] = extraction_times

In [87]:
# Classifier time (svm_80_lda)
class_times = [0] * num_files
classified_tones = [0] * num_files
classified_probs = [None] * num_files
classification_errors = 0;

for i, features in enumerate(features_arr):
    start = time.time()
    try:
        classified_tones[i], classified_probs[i] = clf.classify_tones(features)
    except:
        classification_errors += 1
    class_times[i] = time.time() - start

user_audio_dict['Classified Tones'] = classified_tones
user_audio_dict['Classified Probs'] = classified_probs
times_dict['class'] = class_times

In [95]:
# Get rating
rating_times = [0] * num_files
ratings_arr = [""] * num_files

for i, clf_probs in enumerate(classified_probs):
    start = time.time()
    try:
        print(clf_probs[0])
        target_prob = clf_probs[0][targets[i]-1]

        # if the probability of any other tone is >= 90%, return try again
        for prob in clf_probs[0][np.arange(len(clf_probs[0]))!=(targets[i]-1)]:
            if prob >= 0.9:
                ratings_arr[i] = "Not quite, try again?"

        # if your target probability is more than 50%, you are good
        if target_prob > 0.5:
            ratings_arr[i] = "Great job!"
        else:
            ratings_arr[i] = "Almost there!"
    except:
        ratings_arr[i] = "Unavailable"
    rating_times[i] = time.time() - start

user_audio_dict['Rating'] = ratings_arr
times_dict['rate'] = rating_times

[8.19681778e-08 3.91351620e-08 9.99770256e-01 2.29623285e-04]
[9.79727212e-01 1.90165924e-04 2.79225700e-03 1.72903647e-02]
[9.78796511e-01 1.26304126e-04 3.24547260e-03 1.78317123e-02]
[0.5586142  0.00183959 0.08440883 0.35513738]
[0.70857033 0.02865544 0.21544539 0.04732884]
[9.27435016e-08 1.02779770e-07 9.99713736e-01 2.86068943e-04]
[0.80652717 0.00732524 0.04691233 0.13923526]
[9.75282453e-01 2.14989851e-04 6.65757661e-03 1.78449801e-02]
[7.37517949e-07 2.95878964e-07 9.99420182e-01 5.78784629e-04]
[3.38072391e-03 2.26095928e-04 9.93442804e-01 2.95037663e-03]
[1.20523416e-03 1.80274373e-05 9.76468925e-01 2.23078131e-02]
[3.40352126e-08 1.84275089e-08 3.67318983e-08 9.99999911e-01]
[1.67863634e-07 5.93385163e-08 7.23856274e-08 9.99999700e-01]
[9.76441261e-01 5.76805622e-04 1.02684992e-02 1.27134341e-02]
[2.25174493e-03 5.64035311e-04 9.94102189e-01 3.08203064e-03]
[0.32284519 0.10349621 0.22643181 0.34722679]
[8.93826255e-01 1.46384574e-04 2.50745333e-03 1.03519907e-01]
[3.1970238

In [None]:
# Processing time (svm_80_lda)    
process_times = [0] * num_files

for i, filename in enumerate(filenames):
    start = time.time()
    user_utterance = u.Utterance(filename=USER_TESTING_FILEPATH + filename)
    _, _, features = user_utterance.pre_process(user_info)
    try:
        _, _ = clf.classify_tones(features)
    except:
        pass
    process_times[i] = time.time() - start

times_dict['total'] = process_times

# All generated data

In [104]:
# consolidate data
user_audio_df = pd.DataFrame(user_audio_dict)
raw_data_df = pd.DataFrame(raw_data_dict)
times_df = pd.DataFrame(times_dict)

## With Outliers

In [105]:
pd.set_option('display.max_rows', None)

In [106]:
display(user_audio_df)

Unnamed: 0,Filename,Target,Durations,Filtered Durations
0,ba1_HV1_ex8_R2_user-testing.mp3,1,3.98,0.278639
1,ba1_LV3_ex8_R4_user-testing.mp3,1,1.34,0.25542
2,ba1_LV6_ex8_R1_user-testing.mp3,1,2.12,0.301859
3,bao3_HV4_ex20_R1_user-testing.mp3,3,2.24,0.696599
4,bao3_LV6_ex20_R2_user-testing.mp3,3,2.54,0.650159
5,bing1_HV1_ex16_R2_user-testing.mp3,1,2.82,0.325079
6,bing1_LV2_ex16_R1_user-testing.mp3,1,4.94,1.160998
7,bing1_LV6_ex16_R3_user-testing.mp3,1,1.84,0.20898
8,fa1_HV1_ex3_R1_user-testing.mp3,1,3.92,0.580499
9,fa1_HV4_ex3_R1_user-testing.mp3,1,2.24,0.348299


In [None]:
n, bins, _ = plt.hist(user_audio_df['Durations'])
plt.show() 
print(n)
print(bins)

In [None]:
plt.scatter(user_audio_df['Durations'], times_df['load'])
plt.show()

In [None]:
n, bins, _ = plt.hist(user_audio_df['Filtered Durations'])
plt.show() 
print(n)
print(bins)

In [None]:
plt.scatter(user_audio_df['Filtered Durations'], times_df['contour'])
plt.show()

### Raw Data

In [91]:
display(raw_data_df)

Unnamed: 0,Time Series,Filtered Time Series,Pitch Contours,Normalized Pitch,Nans,Features
0,"[-2.2323418e-05, -9.556377e-06, -2.1398931e-05...","[0.008356462, 0.0073015993, 0.0058155973, 0.00...","[nan, 284.4812036394563, 273.20805135087915, 2...","[[0.5973124579173754, 0.6031324176103163, 0.63...","[False, False, False, False, False, False, Fal...","[[0.6211809316481113, 0.6401089943865449, 0.59..."
1,"[1.6478629e-07, -1.6929025e-07, 1.6652986e-07,...","[-0.0084268525, -0.006313884, -0.0023718618, -...","[nan, nan, 175.11872228117704, 171.11900513652...","[[1.8264043402083043, 1.849605158062067, 1.902...","[False, False, False, False, False, False, Fal...","[[1.8864387150604478, 2.072237895935814, 2.112..."
2,"[5.593205e-08, -4.320672e-08, 2.5254167e-08, -...","[-0.01579637, -0.0017586777, -0.0072183996, -0...","[nan, 155.11447618337345, 156.0130452026747, 1...","[[2.0820591752473065, 2.0850275605500457, 2.09...","[False, False, False, False, False, False, Fal...","[[2.0944702143850384, 2.142887168565473, 2.179..."
3,"[-4.7085686e-07, 1.6121598e-05, -2.965797e-05,...","[0.16149397, 0.16343096, 0.14990065, 0.1235436...","[nan, nan, nan, 209.4588245641254, 205.8604473...","[[1.3632580340046587, 1.4034857262998215, 1.47...","[False, False, False, False, False, False, Fal...","[[1.6930268250007348, 1.7961841709867103, 1.53..."
4,"[1.4438017e-07, -1.4551632e-07, 1.4012384e-07,...","[-0.02786146, -0.022654438, -0.017134013, -0.0...","[nan, nan, nan, 137.3953647458089, 136.6040256...","[[2.414505507612472, 2.417463601377066, 2.4234...","[False, False, False, False, False, False, Fal...","[[2.43484434936543, 2.7837483289567047, 3.0589..."
5,"[1.7059865e-05, 8.577302e-06, 3.66678e-08, -1....","[0.0173583, 0.03505041, 0.06005359, 0.05949120...","[nan, nan, 301.3973362949152, 294.513411620105...","[[0.4283660086986604, 0.4012486703494046, 0.25...","[False, False, False, False, False, True, True...","[[0.26029893582468155, 0.11943758057184553, 0...."
6,"[9.263582e-06, 8.813827e-06, -3.856561e-06, 3....","[-0.013737056, -0.016202377, -0.0030810894, 0....","[nan, 231.07053937445463, 221.91389441356898, ...","[[1.137409639999217, 1.1660393610793256, 1.224...","[False, False, False, False, False, False, Fal...","[[1.3792184084250534, 1.893715138482539, 2.003..."
7,"[5.3673226e-08, -6.89991e-08, 8.60138e-08, -1....","[2.0850384e-06, -2.4053743e-05, -3.3571647e-05...","[nan, nan, 176.1331747192297, 173.107312201228...","[[1.7969425659891067, 1.8115453095712417, 1.84...","[False, False, False, False, False, False, Fal...","[[1.8174862400751006, 1.9085615762361499, 1.95..."
8,"[-9.860878e-06, 1.4507506e-05, 1.2650449e-05, ...","[0.001696023, -0.0022924992, 0.001871909, -0.0...","[nan, nan, 263.9015821545788, 263.901582154578...","[[0.7225384951092451, 0.7165811056383319, 0.71...","[False, False, False, False, False, False, Fal...","[[0.728784170261815, 0.7685447406661127, 0.753..."
9,"[-3.186856e-05, 1.1959399e-05, -1.5235764e-05,...","[-0.017877454, -0.019538423, -0.037981894, -0....","[nan, 231.07053937445463, 232.40911739156792, ...","[[1.0579635123117417, 1.0549813863807513, 1.04...","[False, False, False, False, False, False, Fal...","[[1.0472663200422034, 1.00568755303741, 0.9724..."


In [94]:
error_rows = raw_data_df[user_audio_df['Classified Tones'] == 0]
display(error_rows)

Unnamed: 0,Time Series,Filtered Time Series,Pitch Contours,Normalized Pitch,Nans,Features
18,"[1.4081498e-08, 4.4713836e-08, -1.1749112e-07,...","[0.039636612, 0.058223367, 0.019592335, 0.0060...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[True, True, True, True, True, True, True, Tru...","[[nan, nan, nan, nan, nan, nan]]"
19,"[-1.550862e-05, -1.3107829e-05, 1.6517479e-05,...","[-0.030242397, -0.06909699, 0.008794107, 0.014...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[True, True, True, True, True, True, True, Tru...","[[nan, nan, nan, nan, nan, nan]]"
22,"[7.316672e-08, -7.8282945e-08, 8.0800866e-08, ...","[0.00043815587, 0.001019749, -2.2616014e-05, -...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[True, True, True, True, True, True, True, Tru...","[[nan, nan, nan, nan, nan, nan]]"
26,"[8.7271324e-08, -1.04370265e-07, 1.2081979e-07...","[-0.0053558517, -0.022193376, -0.0077437153, 0...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[True, True, True, True, True, True, True, Tru...","[[nan, nan, nan, nan, nan, nan]]"
34,"[4.6437202e-05, 2.529268e-05, 1.35565015e-05, ...","[-0.02716355, -0.015015244, -0.003392055, 0.00...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[True, True, True, True, True, True, True, Tru...","[[nan, nan, nan, nan, nan, nan]]"
42,"[4.0045527e-09, -2.5451214e-08, 5.199523e-08, ...","[0.00064278016, 0.0008164294, 0.00034824703, 0...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[True, True, True, True, True, True, True, Tru...","[[nan, nan, nan, nan, nan, nan]]"
48,"[-1.4008072e-07, 1.3419891e-07, -1.196462e-07,...","[0.0035159409, 0.0021071385, 0.004613428, 0.00...","[nan, nan, nan, nan, nan, nan, nan, nan, nan, ...","[[nan, nan, nan, nan, nan, nan, nan, nan, nan,...","[True, True, True, True, True, True, True, Tru...","[[nan, nan, nan, nan, nan, nan]]"


### Times

In [None]:
display(times_df)

## Without Outliers

In [None]:
outliers = user_audio_df[user_audio_df['Filtered Durations'] > 0.8].index
print("Num of outliers: ", len(outliers))
user_audio_df_fix = user_audio_df.drop(outliers)
raw_data_df_fix = raw_data_df.drop(outliers)
times_df_fix = times_df.drop(outliers)

### User Audio

In [None]:
n, bins, _ = plt.hist(user_audio_df_fix['Filtered Durations'])
plt.show() 
print(n)
print(bins)

# Report

In [None]:
# File report
print("Audio File Statistics")
print("Quantity :", user_audio_df.shape[0])
print("Duration :", "%.2f" % round(user_audio_df['Durations'].mean(), 2), u"\u00B1", "%.2f" % round(user_audio_df['Durations'].std(), 2) )
print("Filtered :", "%.2f" % round(user_audio_df['Filtered Durations'].mean(), 2), u"\u00B1", "%.2f" % round(user_audio_df['Filtered Durations'].std(), 2) )
print("\n")

print("Audio File Statistics (w/o outliers)")
print("Quantity :", user_audio_df_fix.shape[0])
print("Duration :", "%.2f" % round(user_audio_df_fix['Durations'].mean(), 2), u"\u00B1", "%.2f" % round(user_audio_df_fix['Durations'].std(), 2) )
print("Filtered :", "%.2f" % round(user_audio_df_fix['Filtered Durations'].mean(), 2), u"\u00B1", "%.2f" % round(user_audio_df_fix['Filtered Durations'].std(), 2) )

In [None]:
# Time breakdown reports
print("Task Time Statistics (microseconds)")
for name, values in times_df.items():
    print("%10s :" % name, 
          "%6i" % (values.mean()*1000000), 
          u"\u00B1", 
          "%6i" % (values.std()*1000000)
         )    
print("\n")

print("Task Time Statistics (w/o outliers) (microseconds)")
for name, values in times_df_fix.items():
    print("%10s :" % name, 
          "%6i" % (values.mean()*1000000),
          u"\u00B1", 
          "%6i" % (values.std()*1000000)
         )

In [38]:
print("Classification errors: ", classification_errors)

Classification errors:  64
