# ყველა ფაილის მისამართის ამოღება

In [1]:
!pip install --upgrade librosa
!pip install --upgrade pydub
!pip install --upgrade pip

Requirement already up-to-date: librosa in /home/tamar/anaconda3/envs/ml/lib/python3.7/site-packages (0.7.2)
Requirement already up-to-date: pydub in /home/tamar/anaconda3/envs/ml/lib/python3.7/site-packages (0.23.1)
Requirement already up-to-date: pip in /home/tamar/anaconda3/envs/ml/lib/python3.7/site-packages (20.0.2)


In [2]:
import glob
import os
from pydub import AudioSegment

# audio ფაილების გადაყვანა wav-ში

In [3]:
import pandas as pd
import librosa
import librosa.display

##  <font color =green>extract_features ვარგა. ქვემოთ რაც წერია ყველაფერს სწორად შვება</font>

In [4]:
import numpy as np

def get_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = get_mfcc(audio, sample_rate)
        flux = get_spectral_features(audio, sample_rate)[2]
#         zero_crossing_rate = get_zero_crossing_rate(audio)
#         spectral_features = get_spectral_features(audio, sample_rate)
        chroma_features = get_chroma_features(audio, sample_rate)
        return [mfccs, flux] + chroma_features
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 

In [5]:
def get_all_features(folder):
    folders = os.listdir(folder)
    folders.sort()
    all_features = []
    for label, sub_dir in enumerate(folders): #label {0; 4}
        for file_name in glob.glob(os.getcwd() + '/' + folder +'/' + sub_dir + '/*.wav'):
#             print("Extracting file ", file_name)
            try:
                features = get_features(file_name)
            except Exception as e:
                print("Extraction error")
                continue
            all_features.append(features +  [label + 1])
    data = pd.DataFrame(all_features, columns=['mfccs', 'flux', 'chroma_features1', 'chroma_features2', 'chroma_features3', 'class_label'])
    return data

In [6]:
def get_mfcc(audio, sr):
    #Mel Frequency Cepstral Coefficient
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=100)
    mfccs_scaled = np.mean(mfccs.T,axis=0)
#     mfccs - (40, სხვადასხვა), mfccsscaled - (40,)
    return mfccs_scaled

In [7]:
def get_zero_crossing_rate(audio):
    #ZERO_CROSSING_RATE
    rate = librosa.feature.zero_crossing_rate(audio)
    rate_scaled = np.mean(rate.T,axis=0)
#     rate - (1, სხვადასხვა)
    return rate_scaled

In [8]:
def get_spectral_features(audio, sr):
    sp = librosa.feature.spectral_centroid(audio)
    S, phase = librosa.magphase(librosa.stft(audio))
    a = librosa.feature.spectral_rolloff(S=S, sr=sr)
    onset_env = librosa.onset.onset_strength(y=audio, sr=sr)
    onset_env.shape = (onset_env.shape[0], 1)
    sp_scaled = np.mean(sp.T,axis=0)
    a_scaled = np.mean(a.T,axis=0)
    onset_env_scaled = np.mean(onset_env,axis=0)
    return [sp_scaled, a_scaled, onset_env_scaled]

In [9]:
def get_chroma_features(audio, sr):    
    # Chroma features (needs scaling)
    chroma_stft = librosa.feature.chroma_stft(y=audio, sr=sr) #N1
    chroma_cens = librosa.feature.chroma_cens(y=audio, sr=sr) #Chroma energy normalized statistics
    chroma_cq = librosa.feature.chroma_cqt(y=audio, sr=sr)
#     samive - (12, სხვადასხვა)
    chroma_stft_scaled = np.mean(chroma_stft.T,axis=0)
    chroma_cens_scaled = np.mean(chroma_cens.T,axis=0)
    chroma_cq_scaled = np.mean(chroma_cq.T,axis=0)
    return [chroma_stft_scaled, chroma_cens_scaled, chroma_cq_scaled]

In [10]:
def get_pitch(audio, sr):
    # Pitch (needs scaling)
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
#     pitches - (1025, სხვადასხვა)
    pitches_scaled = np.mean(pitches.T,axis=0)
    return pitches_scaled

In [11]:
data = get_all_features('data')

In [12]:
data = data.sample(frac=1).reset_index(drop=True)
data

Unnamed: 0,mfccs,flux,chroma_features1,chroma_features2,chroma_features3,class_label
0,"[-563.3512, 83.42768, 15.9913845, 30.378, 7.49...",[1.2877665],"[0.490593, 0.43263522, 0.41713786, 0.4319621, ...","[0.23855104703452865, 0.19339022133616926, 0.1...","[0.48419195, 0.3967417, 0.36660203, 0.4046905,...",2
1,"[-415.62613, 129.10883, 0.38149676, -2.560156,...",[1.1878353],"[0.4505017, 0.45908153, 0.4559908, 0.46738443,...","[0.3696895725371677, 0.290672106573877, 0.2328...","[0.7234272, 0.64493823, 0.57561886, 0.61767435...",3
2,"[-414.80673, 65.71164, 11.970185, 18.030397, -...",[1.5477084],"[0.42241085, 0.34647024, 0.3099448, 0.28886437...","[0.3662726855089679, 0.30184172312762186, 0.29...","[0.6762588, 0.59152263, 0.60965645, 0.51682824...",3
3,"[-476.1924, 98.35074, 9.049427, 53.071404, 16....",[1.4277123],"[0.5276107, 0.37308845, 0.4098288, 0.495738, 0...","[0.2701955494926417, 0.2612967298830713, 0.282...","[0.51186836, 0.50900024, 0.5152791, 0.55262387...",1
4,"[-625.66, 95.10185, 2.4537098, 18.509094, -0.6...",[1.5897195],"[0.6447596, 0.49822325, 0.38547748, 0.3440539,...","[0.44987373492421207, 0.38843743780735224, 0.2...","[0.84486204, 0.74459755, 0.61213535, 0.571984,...",5
...,...,...,...,...,...,...
933,"[-559.1015, 33.033833, -1.5776844, 13.724551, ...",[0.8792078],"[0.39154282, 0.37308574, 0.35078266, 0.3753931...","[0.3320585565879884, 0.2361567213514251, 0.212...","[0.6149197, 0.5404739, 0.46294823, 0.517493, 0...",2
934,"[-604.3167, 83.39328, 17.018976, 37.68737, 14....",[1.1346732],"[0.49363038, 0.4740044, 0.42214367, 0.42252132...","[0.3074319749999116, 0.28517516190292935, 0.17...","[0.57406163, 0.4965746, 0.36137938, 0.31753466...",1
935,"[-255.2281, 83.4614, 12.161046, 33.219982, 2.5...",[1.25909],"[0.48128998, 0.43248865, 0.42360225, 0.4463603...","[0.2321763687080916, 0.23501861178036354, 0.27...","[0.547936, 0.5541493, 0.6068322, 0.6248768, 0....",2
936,"[-524.3405, 91.013084, -12.496176, 20.460442, ...",[1.8563066],"[0.42336696, 0.41790977, 0.38771373, 0.3419288...","[0.40768162973538374, 0.3649610190863246, 0.33...","[0.7123892, 0.6882575, 0.6068655, 0.57250977, ...",1


In [13]:
from sklearn.model_selection import train_test_split 
labels = [[i] for i in data['class_label']]
# testLabels = [[i] for i in testData['class_label']]
y_train = np.array(labels)
# y_test = np.array(testLabels)

# x_train, x_test, y_train, y_test = train_test_split(data, np.array(labels), test_size=0.1, random_state = 127)
# print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

# One-hot encode

In [14]:
from sklearn.preprocessing import OneHotEncoder
def get_one_hot(y):
    encoder = OneHotEncoder(sparse=False)
    y_onehot = encoder.fit_transform(y)
    return y_onehot

In [15]:
y_onehot_train = get_one_hot(y_train)
# y_onehot_test = get_one_hot(y_test)
# print(y_onehot_train.shape, y_onehot_test.shape)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


# PCA

In [16]:
def pca(X):
    # normalize the features
    X = (X - X.mean()) / X.std()
    
    # compute the covariance matrix
    X = np.matrix(X)
    cov = (X.T * X) / X.shape[0]
    
    # perform SVD
    U, S, V = np.linalg.svd(cov)
    
    return U, S, V

In [17]:
def project_data(X, U, k):
    U_reduced = U[:,:k]
    return np.dot(X, U_reduced)

In [18]:
def recover_data(Z, U, k):
    U_reduced = U[:,:k]
    return np.dot(Z, U_reduced.T)

# აქ გვინდა რომ ყველა ფიჩერზე გვქონდეს წვდომა და მაგიტომ ამოღებისას უნდა შევცვალოთ როგორცაა შენახული დანარჩენი არაფრის შეცვლა არ მოგვიწევს

In [19]:
def get_processed_X(x):
    X = x.drop(columns=['class_label'])
    X = X.values
    ls = [] #featurebi gvinda da magitom
    for i in range(X.shape[0]):
        features = []
        for j in range(X.shape[1]):
            for k in X[i][j]:
                features.append(k)
        ls.append(features)
    res = np.array(ls)
    return res

In [20]:
ls_train = get_processed_X(data)
# ls_test = get_processed_X(x_test)
# print(ls_train.shape, ls_test.shape)

In [21]:
k = 500
U, S, V = pca(ls_train)
ls_train = project_data(ls_train, U, k)
# U1, S1, V1 = pca(ls_test)
# ls_test = project_data(ls_test, U, k)

 # ნეირონული ქსელის კოდი

In [22]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [23]:
import math

def softmax(z):
    z_exp = [math.exp(i) for i in z]
    sum_z_exp = sum(z_exp)
    return [i / sum_z_exp for i in z_exp]

In [24]:
def forward_propagate(X, theta1, theta2):
    m = X.shape[0]
    
    a1 = np.insert(X, 0, values=np.ones(m), axis=1)
    z2 = a1 * theta1.T
    a2 = np.insert(sigmoid(z2), 0, values=np.ones(m), axis=1)
    z3 = a2 * theta2.T
    h = []
    for i in range(z3.shape[0]):
        z = [z3[i, j] for j in range(z3.shape[1])]
        h.append(softmax(z))
    h = np.array(h)
#     h = sigmoid(z3)
    return a1, z2, a2, z3, h

In [25]:
def cost(params, input_size, hidden_size, num_labels, X, y, learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    # reshape the parameter array into parameter matrices for each layer
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
    
    # run the feed-forward pass
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)

#     J = (-1 / m) * np.sum(y * np.log(h).T) #softmax
    # compute the cost
    J = 0
    for i in range(m):
        first_term = np.multiply(-y[i,:], np.log(h[i,:]))
        second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
        J += np.sum(first_term - second_term)
    
    J = J / m
    
    # add the cost regularization term
    J += (float(learning_rate) / (2 * m)) * (np.sum(np.power(theta1[:,1:], 2)) + np.sum(np.power(theta2[:,1:], 2)))
    
    return J

In [26]:
def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), (1 - sigmoid(z)))

In [27]:
def backprop(params, input_size, hidden_size, num_labels, X, y, learning_rate):
    m = X.shape[0]
    X = np.matrix(X)
    y = np.matrix(y)
    
    # reshape the parameter array into parameter matrices for each layer
    theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
    theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))
    
    # run the feed-forward pass
    a1, z2, a2, z3, h = forward_propagate(X, theta1, theta2)
    
    # initializations
    J = 0
    delta1 = np.zeros(theta1.shape)  # (25, 401)
    delta2 = np.zeros(theta2.shape)  # (10, 26)
    
    J = cost(params,input_size, hidden_size, num_labels, X, y, learning_rate)

    # perform backpropagation
    for t in range(m):
        a1t = a1[t,:]  # (1, 401)
        z2t = z2[t,:]  # (1, 25)
        a2t = a2[t,:]  # (1, 26)
        ht = h[t,:]  # (1, 10)
        yt = y[t,:]  # (1, 10)
        
        d3t = ht - yt  # (1, 10)
        
        z2t = np.insert(z2t, 0, values=np.ones(1))  # (1, 26)
        d2t = np.multiply((theta2.T * d3t.T).T, sigmoid_gradient(z2t))  # (1, 26)
        
        delta1 = delta1 + (d2t[:,1:]).T * a1t
        delta2 = delta2 + d3t.T * a2t
        
    delta1 = delta1 / m
    delta2 = delta2 / m
    
    # add the gradient regularization term
    delta1[:,1:] = delta1[:,1:] + (theta1[:,1:] * learning_rate) / m
    delta2[:,1:] = delta2[:,1:] + (theta2[:,1:] * learning_rate) / m
    
    # unravel the gradient matrices into a single array
    grad = np.concatenate((np.ravel(delta1), np.ravel(delta2)))
    
    return J, grad

In [28]:
# initial setup
input_size = ls_train.shape[1]
hidden_size = 50
num_labels = y_onehot_train.shape[1]
learning_rate = 1.5

# randomly initialize a parameter array of the size of the full network's parameters
params = (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.25

ls_train = np.matrix(ls_train)

# unravel the parameter array into parameter matrices for each layer
theta1 = np.matrix(np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

theta1.shape, theta2.shape 

((50, 138), (5, 51))

In [29]:
a1, z2, a2, z3, h = forward_propagate(ls_train, theta1, theta2)
a1.shape, z2.shape, a2.shape, z3.shape, h.shape
print(h)
print(cost(params, input_size, hidden_size, num_labels, ls_train, y_onehot_train, learning_rate))

[[0.15250609 0.15289431 0.27439777 0.12768066 0.29252117]
 [0.14599048 0.15563719 0.29197557 0.1225629  0.28383385]
 [0.17978844 0.14240967 0.25264439 0.14636093 0.27879657]
 ...
 [0.16436099 0.14735485 0.28918515 0.13282196 0.26627705]
 [0.18182933 0.14617203 0.23706371 0.15215176 0.28278317]
 [0.16325215 0.14806253 0.25028404 0.13888739 0.29951388]]
2.5973425518830657


In [30]:
J, grad = backprop(params, input_size, hidden_size, num_labels, ls_train, y_onehot_train, learning_rate)
J, grad.shape

(2.5973425518830657, (7155,))

In [31]:
from scipy.optimize import minimize

# minimize the objective function
fmin = minimize(fun=backprop, x0=params, args=(input_size, hidden_size, num_labels, ls_train, y_onehot_train, learning_rate), 
                method='TNC', jac=True, options={'maxiter': 300})
fmin

  


     fun: 1.6572454689019034
     jac: array([ 0.00000000e+00,  5.02506198e-04, -1.43815894e-05, ...,
       -2.03581561e-05, -1.72767354e-04, -8.91064641e-05])
 message: 'Max. number of function evaluations reached'
    nfev: 300
     nit: 36
  status: 3
 success: False
       x: array([-0.06890189,  0.31423388, -0.00899329, ..., -0.01272643,
       -0.10803719, -0.05572124])

In [32]:
ls_train = np.matrix(ls_train)
theta1 = np.matrix(np.reshape(fmin.x[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1))))
theta2 = np.matrix(np.reshape(fmin.x[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1))))

a1, z2, a2, z3, h = forward_propagate(ls_train, theta1, theta2)
y_pred_train = np.array(np.argmax(h, axis=1) + 1)

In [33]:
thetas = []
thetas.append(theta1)
thetas.append(theta2)
np.save('weights', thetas)

In [34]:
# ls_test = np.matrix(ls_test)

# a1, z2, a2, z3, h = forward_propagate(ls_test, theta1, theta2)
# y_pred = np.array(np.argmax(h, axis=1) + 1)

In [35]:
correct = [1 if a == b else 0 for (a, b) in zip(y_pred_train, y_train)]
accuracy = (sum(map(int, correct)) / float(len(correct)))
print(accuracy)

0.6748400852878464


In [36]:
# correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y_test)]
# accuracy = (sum(map(int, correct)) / float(len(correct)))
# print(accuracy)