# Load csv with DNS-MOS scores and visualize

In [None]:
#dependencies

import pandas as pd
import numpy as np
import pprint
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns



In [None]:
# read csv with pandas
path = "/Users/marius/Documents/Uni/TU_Berlin_Master/Masterarbeit/Data/MOS_Scores_Voicefixer.csv"
data = pd.read_csv(path)
# add row number to df
data.rename(columns={data.columns[0]:'row_number'}, inplace = True)

# add labels: 'noisy', 'voicefixer', 'produced'
for i, name in enumerate(data['filename']):
    # get foldername and save label
    if name.split('/')[-2] == 'producedSpeech':
        data.loc[i, 'label'] = 'produced'
    elif name.split('/')[-2] == 'noisySpeech':
        data.loc[i, 'label'] = 'noisy'
    elif name.split('/')[-2] == 'voicefixerOutput':
        data.loc[i, 'label'] = 'voicefixer'

envs = ['ipad_balcony1', 
        'ipad_bedroom1', 
        'ipad_confroom1', 
        'ipad_confroom2', 
        'ipad_livingroom1',
        'ipad_office1', 
        'ipad_office2', 
        'ipadflat_confroom1',
        'ipadflat_office1', 
        'iphone_balcony1', 
        'iphone_bedroom1',
        'iphone_livingroom1']

# add environments: 'ipadflat_office1', 'iphone_livingroom1',...
for i, name in enumerate(data['filename']):
    for j in range(len(envs)):
        if envs[j] in (name.split('/')[-1]):
            data.loc[i, 'env'] = envs[j]

# sort by label
data = data.sort_values(by=['label'])

# save as .csv file
data.to_csv('/Users/marius/Documents/Uni/TU_Berlin_Master/Masterarbeit/Data/data.csv', encoding='utf-8')

# show some values
data.head(300)


In [None]:
# scatterplots

#x = np.linspace(0, data.shape[0], data.shape[0]-1)
produced = data.loc[data['label'] == 'produced']
noisy = data.loc[data['label'] == 'noisy']
voicefixer = data.loc[data['label'] == 'voicefixer']

produced.plot(kind='scatter',x='row_number', y='BAK_raw', figsize=(5,2))
plt.suptitle('produced')

voicefixer.plot(kind='scatter',x='row_number', y='BAK_raw', figsize=(5,2))
plt.suptitle('voicefixer')

noisy.plot(kind='scatter',x='row_number', y='BAK_raw', figsize=(5,2))
plt.suptitle('noisy')





In [None]:
# Barplots
produced = data.loc[data['label'] == 'produced']
produced_mean = np.mean(produced['BAK_raw'])

noisy = data.loc[data['label'] == 'noisy']
noisy_mean = np.mean(noisy['BAK_raw'])

voicefixer = data.loc[data['label'] == 'voicefixer']
voicefixer_mean = np.mean(voicefixer['BAK_raw'])

######

values = [produced_mean, voicefixer_mean, noisy_mean]
names = ['produced', 'voicefixer', 'noisy']
x_pos = np.arange(len(names))

plt.bar(x_pos, values)
plt.xticks(x_pos, names)
plt.title('DNS-MOS Values of different files')
plt.ylabel('DNS-MOS Score')
plt.show()


In [None]:
# Barplot for every environment 
produced = data.loc[data['label'] == 'produced']
produced_mean = np.mean(produced['BAK_raw'])

envs = ['ipad_balcony1', 
        'ipad_bedroom1', 
        'ipad_confroom1', 
        'ipad_confroom2', 
        'ipad_livingroom1',
        'ipad_office1', 
        'ipad_office2', 
        'ipadflat_confroom1',
        'ipadflat_office1', 
        'iphone_balcony1', 
        'iphone_bedroom1',
        'iphone_livingroom1']

# noisy -  mean value for each environment
mos_values_noisy = []

for i in range(len(envs)):
        noisy_MOS_values = data[(data['label'] == 'noisy') & (data['env'] == envs[i])]
        mean = np.mean(noisy_MOS_values['BAK_raw'])
        mos_values_noisy.append(mean)

print('------------------------')
pprint.pprint(f'Mean values for MOS_bak in the different noisy environments: {mos_values_noisy}')


# voicefixer generated speech -  mean value for each environment
voicefixer_MOS_values = []

for i in range(len(envs)):
        values = data[(data['label'] == 'voicefixer') & (data['env'] == envs[i])]
        mean = np.mean(values['BAK_raw'])
        voicefixer_MOS_values.append(mean)

print('------------------------')
pprint.pprint(f'Voicefixer mean values for MOS_bak in the different environments: {voicefixer_MOS_values}')


# Produced speech -  mean value
produced = data.loc[data['label'] == 'produced']
produced_MOS_values = np.mean(produced['BAK_raw'])

print('------------------------')
pprint.pprint(f'Produced mean values for MOS_bak: {produced_MOS_values}')

MOS_values = []

MOS_values = np.append(mos_values_noisy, voicefixer_MOS_values)
MOS_values = np.append(MOS_values, produced_MOS_values)
print('------------------------')
pprint.pprint(f'MOS mean values for MOS_bak: {MOS_values}')

len(MOS_values)


In [None]:
x = ['ipad_balcony1', 
        'ipad_bedroom1', 
        'ipad_confroom1', 
        'ipad_confroom2', 
        'ipad_livingroom1',
        'ipad_office1', 
        'ipad_office2', 
        'ipadflat_confroom1',
        'ipadflat_office1', 
        'iphone_balcony1', 
        'iphone_bedroom1',
        'iphone_livingroom1',
        'ipad_balcony1', 
        'ipad_bedroom1', 
        'ipad_confroom1', 
        'ipad_confroom2', 
        'ipad_livingroom1',
        'ipad_office1', 
        'ipad_office2', 
        'ipadflat_confroom1',
        'ipadflat_office1', 
        'iphone_balcony1', 
        'iphone_bedroom1',
        'iphone_livingroom1',
        'produced_speech']

x_pos = np.arange(len(x))
y = MOS_values
col = ['red','red','red','red','red','red','red','red','red','red','red','red','blue','blue'
       ,'blue','blue','blue','blue','blue','blue','blue','blue','blue','blue', 'green']

# plot 
plt.figure(figsize=(10,4))
plt.bar(x_pos,y, color=col)
plt.xticks(x_pos, x, rotation=90)
# create legend manually
a = mpatches.Patch(color='red', label='Noisy speech files')
b = mpatches.Patch(color='blue', label='Voicefixer speech files')
c = mpatches.Patch(color='green', label='Produced speech files')
plt.legend(handles=[a,b,c])
plt.title('Mean DNS-MOS Scores sorted by environments')
plt.xlabel('..')
plt.ylabel('DNS-MOS Score [BAK_raw]')