In [117]:
import glob
import os
from pydub import AudioSegment

In [118]:
import pandas as pd
import librosa
import librosa.display
from sklearn import preprocessing

In [339]:
import numpy as np

def get_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = get_mfcc(audio, sample_rate)
        flux = get_spectral_features(audio, sample_rate)[2]
#         zero_crossing_rate = get_zero_crossing_rate(audio)
#         spectral_features = get_spectral_features(audio, sample_rate)
        chroma_features = get_chroma_features(audio, sample_rate)
        return [mfccs, flux] + chroma_features
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None

In [562]:
def get_all_features(folder):
    folders = os.listdir(folder)
    folders.sort()
    all_features = []
    for label, sub_dir in enumerate(folders): #label {0; 4}
        for file_name in glob.glob(os.getcwd() + '/' + folder +'/' + sub_dir + '/*.wav'):
#             print("Extracting file ", file_name)
            try:
                features = get_features(file_name)
            except Exception as e:
                print("Extraction error")
                continue
            sub_strs= file_name.split('/')
            all_features.append(features +  [label + 1]+[sub_strs[-1]])
    data = pd.DataFrame(all_features, columns=['mfccs', 'flux', 'chroma_features1', 'chroma_features2', 'chroma_features3', 'class_label','author'])
    return data

In [563]:
def get_mfcc(audio, sr):
    #Mel Frequency Cepstral Coefficient
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=100)
    mfccs_scaled = np.mean(mfccs.T,axis=0)
#     mfccs - (40, სხვადასხვა), mfccsscaled - (40,)
    return mfccs_scaled

In [564]:
def get_zero_crossing_rate(audio):
    #ZERO_CROSSING_RATE
    rate = librosa.feature.zero_crossing_rate(audio)
    rate_scaled = np.mean(rate.T,axis=0)
#     rate - (1, სხვადასხვა)
    return rate_scaled

In [565]:
def get_spectral_features(audio, sr):
    sp = librosa.feature.spectral_centroid(audio)
    S, phase = librosa.magphase(librosa.stft(audio))
    a = librosa.feature.spectral_rolloff(S=S, sr=sr)
    onset_env = librosa.onset.onset_strength(y=audio, sr=sr)
    onset_env.shape = (onset_env.shape[0], 1)
    sp_scaled = np.mean(sp.T,axis=0)
    a_scaled = np.mean(a.T,axis=0)
    onset_env_scaled = np.mean(onset_env,axis=0)
    return [sp_scaled, a_scaled, onset_env_scaled]

In [344]:
def get_chroma_features(audio, sr):    
    # Chroma features (needs scaling)
    chroma_stft = librosa.feature.chroma_stft(y=audio, sr=sr) #N1
    chroma_cens = librosa.feature.chroma_cens(y=audio, sr=sr) #Chroma energy normalized statistics
    chroma_cq = librosa.feature.chroma_cqt(y=audio, sr=sr)
#     samive - (12, სხვადასხვა)
    chroma_stft_scaled = np.mean(chroma_stft.T,axis=0)
    chroma_cens_scaled = np.mean(chroma_cens.T,axis=0)
    chroma_cq_scaled = np.mean(chroma_cq.T,axis=0)
    return [chroma_stft_scaled, chroma_cens_scaled, chroma_cq_scaled]

In [345]:
def get_pitch(audio, sr):
    # Pitch (needs scaling)
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
#     pitches - (1025, სხვადასხვა)
    pitches_scaled = np.mean(pitches.T,axis=0)
    return pitches_scaled

In [567]:
data = get_all_features('data')

In [568]:
# data = data.sample(frac=1).reset_index(drop=True)
data

Unnamed: 0,mfccs,flux,chroma_features1,chroma_features2,chroma_features3,class_label,author
0,"[-519.81506, 103.558624, 5.094279, 12.520718, ...",[1.1040484],"[0.47122666, 0.53405166, 0.4343129, 0.4197182,...","[0.3577661033313455, 0.31667392169305425, 0.27...","[0.7311941, 0.6571596, 0.61311823, 0.57645756,...",1,devi-khos-1c.wav
1,"[-464.22855, 86.402466, 1.4660753, 39.858746, ...",[1.2896011],"[0.21589144, 0.2389997, 0.20254241, 0.19910134...","[0.21907555772651013, 0.2295746090213864, 0.20...","[0.48257867, 0.46096468, 0.47640213, 0.4181773...",1,levan-gela-1i.wav
2,"[-486.81018, 78.95307, 10.765352, 24.155785, 2...",[1.0302314],"[0.24482803, 0.42047572, 0.3438334, 0.21628611...","[0.2386073375435842, 0.2771850018902663, 0.282...","[0.472282, 0.54941887, 0.5558386, 0.4956196, 0...",1,levan-gela-1e.wav
3,"[-371.95035, 88.89893, 18.585104, 37.49893, 22...",[1.2952656],"[0.66026014, 0.65159476, 0.669117, 0.6810091, ...","[0.36983599072945383, 0.3116892941638839, 0.27...","[0.7836753, 0.653338, 0.58456016, 0.64552176, ...",1,demetre-pipi-1j.wav
4,"[-395.92157, 113.11236, -11.457305, 20.73268, ...",[1.3744305],"[0.44697517, 0.4343074, 0.46665907, 0.46354422...","[0.13909989620870808, 0.11157221325200287, 0.0...","[0.26981196, 0.22124545, 0.22039069, 0.2268472...",1,aleksandre-surg-1a.wav
...,...,...,...,...,...,...,...
928,"[-512.19196, 80.67449, 10.107907, 25.327131, -...",[1.6881658],"[0.530046, 0.5536834, 0.45305467, 0.25472108, ...","[0.4000457763267655, 0.4343604425235178, 0.347...","[0.7468369, 0.7724738, 0.63860536, 0.464398, 0...",5,vakho-koto-5.wav
929,"[-483.0102, 54.661938, 7.4288363, 14.600777, -...",[1.1483014],"[0.37088937, 0.3834142, 0.4068672, 0.50962865,...","[0.19182951536924017, 0.2038463863858157, 0.21...","[0.5225111, 0.505493, 0.53714085, 0.6103023, 0...",5,landg16-xuti-6.wav
930,"[-469.48288, 88.22836, -0.18532442, 30.47129, ...",[2.073814],"[0.6775684, 0.66309303, 0.58909744, 0.5385316,...","[0.3661583151703543, 0.41587573709340125, 0.35...","[0.74639964, 0.84113187, 0.7271359, 0.68710893...",5,Mariam-chkh-5c.wav
931,"[-496.92728, 89.38166, 17.64291, 20.645283, -1...",[1.272223],"[0.35152233, 0.3579253, 0.27196565, 0.18533759...","[0.25597659639987624, 0.23097017694564023, 0.2...","[0.51328653, 0.4739466, 0.48965722, 0.43698457...",5,levan-gela-5b.wav


In [566]:
test_data = get_all_features('test')
# test_data = test_data.sample(frac=1).reset_index(drop=True)
test_data

Unnamed: 0,mfccs,flux,chroma_features1,chroma_features2,chroma_features3,class_label,author
0,"[-506.1684, 65.66203, 33.701477, 43.65632, -4....",[1.5657881],"[0.40114653, 0.3731631, 0.48328295, 0.48039213...","[0.2920471097086192, 0.2930405019111327, 0.290...","[0.6231679, 0.6254522, 0.6280429, 0.5692649, 0...",1,1a-mariam.wav
1,"[-478.53287, 65.18405, 17.451687, 21.892368, 7...",[1.5289221],"[0.6736648, 0.6688613, 0.60539407, 0.60239834,...","[0.3421289642681023, 0.33528348089142884, 0.27...","[0.7224362, 0.73755556, 0.62307197, 0.6196807,...",1,2020-02-06-13:36:45.wav
2,"[-491.16028, 80.35386, 30.775454, 23.93185, -4...",[1.2332506],"[0.3120178, 0.39821157, 0.4679588, 0.43941608,...","[0.25435417789748055, 0.27823698888932513, 0.2...","[0.5137105, 0.5593776, 0.5820793, 0.5284205, 0...",2,2a-mariam.wav
3,"[-444.47464, 61.194283, 24.238213, 19.800646, ...",[1.375505],"[0.67180896, 0.67138696, 0.61472505, 0.6156932...","[0.35798398594514086, 0.30514770483534287, 0.2...","[0.76032984, 0.6668473, 0.60944355, 0.582932, ...",2,2020-02-06-13:37:14.wav
4,"[-444.93436, 50.723774, 18.369139, 16.19961, 7...",[1.4494555],"[0.59621143, 0.494852, 0.43561548, 0.47940192,...","[0.34556402520548585, 0.25663220899678424, 0.2...","[0.7199497, 0.60318553, 0.60958517, 0.5880586,...",3,2020-02-06-13:37:34.wav
5,"[-506.0479, 102.67351, 21.879494, 15.528565, -...",[1.7726547],"[0.3206917, 0.349603, 0.3843402, 0.3727167, 0....","[0.2073938701528828, 0.22265289558579782, 0.22...","[0.5220888, 0.53869176, 0.5591394, 0.53072655,...",3,3a-mariam.wav
6,"[-485.59055, 63.222195, 15.867859, 20.553497, ...",[1.5205839],"[0.6946735, 0.6309911, 0.6145359, 0.60764027, ...","[0.3931982878551257, 0.36561492294997605, 0.28...","[0.8065624, 0.7632137, 0.6045693, 0.60074776, ...",4,2020-02-06-13:37:52.wav
7,"[-429.3955, 84.80725, 18.176964, 26.201424, -7...",[1.9984628],"[0.4646005, 0.44519487, 0.4020764, 0.37762818,...","[0.29015640189163744, 0.28485227585615125, 0.2...","[0.5988225, 0.59547395, 0.5745526, 0.5712071, ...",4,4a-mariam.wav
8,"[-405.18478, 97.23668, 26.406164, 29.371513, -...",[1.9520679],"[0.51994026, 0.5636445, 0.56319994, 0.47556448...","[0.31744241277932217, 0.34654344375363483, 0.3...","[0.70441234, 0.69583523, 0.7297711, 0.6737782,...",5,5a-mariam.wav
9,"[-475.99872, 97.237366, 26.406332, 29.371502, ...",[1.9520866],"[0.51993966, 0.5636841, 0.56326824, 0.4756088,...","[0.31744241277932217, 0.34654344375363483, 0.3...","[0.70439345, 0.69583005, 0.72975284, 0.6737864...",5,5-mariam.wav


In [585]:
from sklearn.model_selection import train_test_split 
labels = [[i] for i in data['class_label']]
testLabels = [[i] for i in test_data['class_label']]
testAuthors = [[i] for i in test_data['author']]
y_train = np.array(labels)
y_test = np.array(testLabels)
authors = np.array(testAuthors)
# x_train, x_test, y_train, y_test = train_test_split(data, np.array(labels), test_size=0.1, random_state = 127)
# print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

# One-hot encode

In [570]:
from sklearn.preprocessing import OneHotEncoder
def get_one_hot(y):
    encoder = OneHotEncoder(sparse=False)
    y_onehot = encoder.fit_transform(y)
    return y_onehot

In [571]:
y_onehot_train = get_one_hot(y_train)
y_onehot_test = get_one_hot(y_test)
print(y_onehot_train.shape, y_onehot_test.shape)

(933, 5) (10, 5)


In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


# აქ გვინდა რომ ყველა ფიჩერზე გვქონდეს წვდომა და მაგიტომ ამოღებისას უნდა შევცვალოთ როგორცაა შენახული დანარჩენი არაფრის შეცვლა არ მოგვიწევს

In [572]:
def get_processed_X(x):
    X = x.drop(columns=['class_label','author'])
    X = X.values
    ls = [] #featurebi gvinda da magitom
    for i in range(X.shape[0]):
        features = []
        for j in range(X.shape[1]):
            for k in X[i][j]:
                features.append(k)
        ls.append(features)
    res = np.array(ls)
#     აქ ჩავამატე
#     print(preprocessing.scale(res))
    return preprocessing.scale(res)

In [575]:
ls_train = get_processed_X(data)
ls_test = get_processed_X(test_data)
print(ls_train.shape, ls_test.shape)

(933, 137) (10, 137)


 # ნეირონული ქსელის კოდი

In [541]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [542]:
import math

def softmax(z):
    z_exp = [math.exp(i) for i in z]
    sum_z_exp = sum(z_exp)
    return [i / sum_z_exp for i in z_exp]

In [591]:
def get_accuracy(h, y):
    sm = 0
    for a,i in zip(h, y):
        sm+=a[(i[0]-1)]
    sm/= y.shape[0]
    return sm

In [543]:
def forward_propagate(X, theta1, theta2):
    m = X.shape[0]
    
    a1 = np.insert(X, 0, values=np.ones(m), axis=1)
    z2 = a1 * theta1.T
    a2 = np.insert(sigmoid(z2), 0, values=np.ones(m), axis=1)
    z3 = a2 * theta2.T
    h = []
    for i in range(z3.shape[0]):
        z = [z3[i, j] for j in range(z3.shape[1])]
        h.append(softmax(z))
    h = np.array(h)
#     h = sigmoid(z3)
    return a1, z2, a2, z3, h

In [544]:
def cost(params, input_size, hidden_size, num_labels, X, y, learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    # reshape the parameter array into parameter matrices for each layer
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
    
    # run the feed-forward pass
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)

#     J = (-1 / m) * np.sum(y * np.log(h).T) #softmax
    # compute the cost
    J = 0
    for i in range(m):
        first_term = np.multiply(-y[i,:], np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    
    J = J / m
    
    # add the cost regularization term
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
    
    return J

In [545]:
def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), (1 - sigmoid(z)))

In [546]:
def backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    
    # reshape the parameter array into parameter matrices for each layer
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
    
    # run the feed-forward pass
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    
    # initializations
    J = 0
    delta1 = np.zeros(theta1.shape)  # (25, 401)
    delta2 = np.zeros(theta2.shape)  # (10, 26)
    
    J = cost(params,input_size, hidden_size, num_labels, X, y, learning_rate)

    # perform backpropagation
    for t in range(m):
        a1t = a1[t,:]  # (1, 401)
        z2t = z2[t,:]  # (1, 25)
        a2t = a2[t,:]  # (1, 26)
        ht = h[t,:]  # (1, 10)
        yt = y[t,:]  # (1, 10)
        
        d3t = ht - yt  # (1, 10)
        
        z2t = np.insert(z2t, 0, values=np.ones(1))  # (1, 26)
        d2t = np.multiply((theta2.T * d3t.T).T, sigmoid_gradient(z2t))  # (1, 26)
        
        delta1 = delta1 + (d2t[:,1:]).T * a1t
        delta2 = delta2 + d3t.T * a2t
        
    delta1 = delta1 / m
    delta2 = delta2 / m
    
    # add the gradient regularization term
    delta1[:,1:] = delta1[:,1:] + (theta1[:,1:] * learning_rate) / m
    delta2[:,1:] = delta2[:,1:] + (theta2[:,1:] * learning_rate) / m
    
    # unravel the gradient matrices into a single array
    grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))
    
    return J, grad

In [547]:
# initial setup
input_size = ls_train.shape[1]
hidden_size = 50
num_labels = y_onehot_train.shape[1]
learning_rate = 0.4

# randomly initialize a parameter array of the size of the full network's parameters
params = (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.25

m = ls_train.shape[0]
ls_train = np.matrix(ls_train)

# unravel the parameter array into parameter matrices for each layer
theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

theta1.shape, theta2.shape 

((50, 138), (5, 51))

In [548]:
a1, z2, a2, z3, h = forward_propagate(ls_train, theta1, theta2)
a1.shape, z2.shape, a2.shape, z3.shape, h.shape
print(h)
print(cost(params, input_size, hidden_size, num_labels, ls_train, y_onehot_train, learning_rate))
get_accuracy(h, y_train)


[[0.15266975 0.18315737 0.11304816 0.25140902 0.29971569]
 [0.13316876 0.18511417 0.11953446 0.23366211 0.3285205 ]
 [0.14420103 0.17648031 0.12747741 0.26561103 0.28623022]
 ...
 [0.1563142  0.18185399 0.11840361 0.26837421 0.27505398]
 [0.15368746 0.17253105 0.12692064 0.23943433 0.30742652]
 [0.15179992 0.18978624 0.11880342 0.24297405 0.29663637]]
2.578164111854486


0.1992052695009525

In [549]:
J, grad = backprop(params, input_size, hidden_size, num_labels, ls_train, y_onehot_train, learning_rate)
J, grad.shape

(2.578164111854486, (7155,))

In [551]:
from scipy.optimize import minimize

# minimize the objective function
fmin = minimize(fun=backprop, x0=params, args=(input_size, hidden_size, num_labels, ls_train, y_onehot_train, learning_rate), 
                method='TNC', jac=True, options={'maxiter': 250})
fmin

     fun: 0.1935884542158433
     jac: array([-1.75912291e-04,  1.80711725e-04,  1.77996890e-04, ...,
        2.56811504e-05,  8.81933335e-05, -2.88141052e-05])
 message: 'Max. number of function evaluations reached'
    nfev: 250
     nit: 19
  status: 3
 success: False
       x: array([-0.06376093, -0.21276501, -0.23442981, ...,  1.84576078,
        1.99751689,  1.07154924])

In [534]:
ls_train = np.matrix(ls_train)
theta1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(ls_train, theta1, theta2)
# y_pred_train = np.array(np.argmax(h, axis=1) + 1)
accuracy = get_accuracy(h, y_train)
print(accuracy)


0.965101202195832


In [535]:
ls_test = np.matrix(ls_test)

a1, z2, a2, z3, h = forward_propagate(ls_test, theta1, theta2)
# y_pred = np.array(np.argmax(h, axis=1) + 1)

accuracy = get_accuracy(h, y_test)
# print(h[0])
print(accuracy)
if accuracy > 0.64:
    print("hey")
    thetas = []
    thetas.append(theta1)
    thetas.append(theta2)
    np.save('weights', thetas)

0.6156008780946396


In [367]:
# correct = [1 if a == b else 0 for (a, b) in zip(y_pred_train, y_train)]
# accuracy = (sum(map(int, correct)) / float(len(correct)))
# print(accuracy)

In [368]:
# correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y_test)]
# accuracy = (sum(map(int, correct)) / float(len(correct)))
# print(accuracy)

# ჩაწერა-წაკითხვა

In [369]:
# thetas = []
# thetas.append(theta1)
# thetas.append(theta2)
# np.save('weights', thetas)
# theta1, theta2 = np.load('weights.npy', allow_pickle = True)


# გვჭირდება !pip install XlsxWriter

In [590]:
import xlsxwriter
workbook = xlsxwriter.Workbook("result.xlsx")
worksheet = workbook.add_worksheet()
row = 0 
col = 0


for name in authors:
    worksheet.write(row, col, name[0])
    for a, b, c, d, e in h:
        worksheet.write(row, col+1, a)
        worksheet.write(row, col+2, b)
        worksheet.write(row, col+3, c)
        worksheet.write(row, col+4, d)
        worksheet.write(row, col+5, e)
    row+=1
    
workbook.close()