### Creating Dataset

In [3]:
import os
from pydub import AudioSegment, silence
from mutagen.wave import WAVE
import pandas as pd

In [54]:
# Conversion to WAVE format
for inp_file in os.listdir(r'C:\\FinalYearProject\\Audio Analysis Module\\Dataset'):
    is_file=True
    out_file = "Dataset/WAVE_Files/" + os.path.splitext(inp_file)[0] + ".wav"
    if inp_file.endswith(".mp3"): 
        sound = AudioSegment.from_mp3("Dataset/" + inp_file)
    elif inp_file.endswith(".ogg"):
        sound = AudioSegment.from_ogg("Dataset/" + inp_file)
    else:
        is_file=False
    if is_file:
        sound.export(out_file, format="wav")

In [55]:
# Generating CSV file
info = {'file': [],
        'total_duration': [],
        'num_pauses' : [],
        'total_silence' : [],
        'personality' : []}

#print("file\ttotal_dur\tnum_pauses\ttotal_silence\tconfidence")

for i in range(1, 105):
    file = "Dataset/WAVE_Files/" + str(i) + ".wav"
    audio = WAVE(file)
    audio_info = audio.info
    total_duration = audio_info.length

    audio = AudioSegment.from_wav(file)
    dBFS = audio.dBFS
    silent_sections = silence.detect_silence(audio, min_silence_len=500, silence_thresh=dBFS-16)
    num_pauses=0
    total_duration_of_pauses = 0

    for start, stop in silent_sections:
        num_pauses+=1
        total_duration_of_pauses+=(stop/1000-start/1000)

    conf_pred="Confident"
    if(round(total_duration_of_pauses*100/total_duration,2)>20.00):
        conf_pred="Not Confident"

    info['file'].append(str(i)+".wav")
    info['total_duration'].append(round(total_duration,2))
    info['num_pauses'].append(num_pauses)
    info['total_silence'].append(round(total_duration_of_pauses,2))
    info['personality'].append(conf_pred)

df = pd.DataFrame(info)
df
csv_data = df.to_csv("data.csv", index=False)

### Building SVM Model

In [1]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

In [5]:
df = pd.read_csv('data.csv')

X = df[['total_duration', 'num_pauses', 'total_silence']]
Y = df['personality']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=100, test_size=0.20, shuffle=True)

svm_model = SVC(kernel='linear')
svm_model.fit(X_train, Y_train)
print("SVM Model build successful")

pred = svm_model.predict(X_test)
print("Accuracy Score : " + str(round(accuracy_score(Y_test, pred)*100, 2)) + " %")

joblib.dump(svm_model, 'svm_model.sav')

SVM Model build successful
Accuracy Score : 95.24 %


['svm_model.sav']

### Confidence Prediction from Audio File

In [6]:
# Conversion to WAVE format
audio_file = input("Enter audio file name with extension : ")
out_file = os.path.splitext(audio_file)[0] + ".wav"
if audio_file.endswith(".mp3"): 
    sound = AudioSegment.from_mp3(audio_file)
elif inp_file.endswith(".ogg"):
    sound = AudioSegment.from_ogg(audio_file)
sound.export(out_file, format="wav")
# os.unlink(audio_file)

# Predict using SVM Classifier
audio = WAVE(out_file)
audio_info = audio.info
total_duration = audio_info.length

audio = AudioSegment.from_wav(out_file)
dBFS = audio.dBFS
silent_sections = silence.detect_silence(audio, min_silence_len=500, silence_thresh=dBFS-16)
num_pauses=0
total_duration_of_pauses = 0

for start, stop in silent_sections:
    num_pauses+=1
    total_duration_of_pauses+=(stop/1000-start/1000)

feature_set = {'total_duration': [],
        'num_pauses' : [],
        'total_silence' : []}
feature_set['total_duration'].append(round(total_duration,2))
feature_set['num_pauses'].append(num_pauses)
feature_set['total_silence'].append(round(total_duration_of_pauses,2))
info = pd.DataFrame(feature_set)
os.unlink(out_file)

svm_model = joblib.load('svm_model.sav')
pred = svm_model.predict(info)
print("Confidence Prediction : " + str(pred[0]))

Confidence Prediction : Confident
