In [1]:
# Признаки
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import xgboost

In [2]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, roc_curve
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

In [3]:
from tqdm.notebook import tqdm
from scipy import stats
from keras import backend as K

In [None]:
def inverse_eer(y_true, scores):
    fpr, tpr, thresholds = roc_curve(y_true, scores, pos_label=1)
    fnr = 1 - tpr
    try:
        eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    except ValueError:
        eer = 0.0
    return 1-eer

In [None]:
%matplotlib inline
!mv '~/Google\ Drive\ File\ Stream/Мой\ диск/training_data.tar.gz' './training_data.tar.gz'
!mv '~/Google\ Drive\ File\ Stream/Мой\ диск/Testing_Data_MLTASK.tar.gz' './testing_data.tar.gz'
!tar xvzf training_data.tar.gz
!tar xvzf testing_data.tar.gz

In [None]:
MFCC_NUM = 21
header = 'filename chroma_stft chroma_cqt chroma_cens melspectrogram rmse spectral_centroid spectral_bandwidth spectral_contrast spectral_flatness rolloff poly_features tonnetz zero_crossing_rate'
for i in range(1, MFCC_NUM):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [None]:
file = open('training-dataset.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'human spoof'.split()
for g in genres:
    for filename in os.listdir(f'./Training_Data/{g}'):
        songname = f'./Training_Data/{g}/{filename}'
        y, sr = librosa.load(soundname, mono=True, duration=30)
        rms = librosa.feature.rms(y=y)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr)
        chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
        melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        spectral_flatness = librosa.feature.spectral_flatness(y=y)
        poly_features = librosa.feature.poly_features(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y=y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('training-dataset.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [None]:
file = open('testing-dataset.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
for filename in os.listdir(f'./Training_Data/'):
    songname = f'./Training_Data/{filename}'
    y, sr = librosa.load(soundname, mono=True, duration=30)
    rms = librosa.feature.rms(y=y)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr)
    chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
    melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    spectral_flatness = librosa.feature.spectral_flatness(y=y)
    poly_features = librosa.feature.poly_features(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y=y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    to_append += f' {g}'
    file = open('testing-dataset.csv', 'a', newline='')
    with file:
        writer = csv.writer(file)
        writer.writerow(to_append.split())

In [4]:
data = pd.read_csv('./training-dataset.csv')
data.corr()

Unnamed: 0,chroma_stft,chroma_cqt,chroma_cens,melspectrogram,rmse,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_flatness,rolloff,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
chroma_stft,1.0,0.667065,0.532928,-0.429819,-0.453419,0.137036,0.344097,-0.35353,-0.401151,0.229024,...,0.150081,0.245175,0.327554,-0.039716,0.337868,0.231724,0.167016,0.091908,0.38351,0.056592
chroma_cqt,0.667065,1.0,0.887577,-0.336243,-0.421256,0.183264,0.21626,-0.567021,-0.174296,0.200474,...,0.128481,0.083785,0.167899,0.002587,0.231524,0.13444,0.191303,0.105546,0.253616,0.105786
chroma_cens,0.532928,0.887577,1.0,-0.289164,-0.360749,0.189454,0.146513,-0.487303,-0.135986,0.184225,...,0.072923,0.053871,0.10133,0.021159,0.209898,0.117336,0.170477,0.088147,0.242263,0.096182
melspectrogram,-0.429819,-0.336243,-0.289164,1.0,0.943705,-0.256098,-0.348985,-0.031108,0.476913,-0.307517,...,-0.047769,-0.193641,-0.132347,-0.052694,-0.268659,-0.097943,-0.253782,-0.140142,-0.284091,-0.165933
rmse,-0.453419,-0.421256,-0.360749,0.943705,1.0,-0.215599,-0.359771,0.046549,0.451839,-0.276703,...,-0.054625,-0.188784,-0.133519,-0.083711,-0.26122,-0.114828,-0.26078,-0.12117,-0.276002,-0.175661
spectral_centroid,0.137036,0.183264,0.189454,-0.256098,-0.215599,1.0,0.639542,0.253526,-0.202607,0.931252,...,0.052383,0.002673,0.026391,-0.194551,0.278295,-0.259744,0.278807,0.000205,0.165754,-0.05005
spectral_bandwidth,0.344097,0.21626,0.146513,-0.348985,-0.359771,0.639542,1.0,0.136772,-0.347521,0.836252,...,0.113916,0.055438,0.160005,-0.223381,0.315156,-0.274252,0.287118,-0.140465,0.171892,-0.029536
spectral_contrast,-0.35353,-0.567021,-0.487303,-0.031108,0.046549,0.253526,0.136772,1.0,-0.284507,0.230132,...,-0.135724,0.027788,-0.041367,-0.0802,0.085809,-0.130997,0.067782,-0.016437,0.071102,-0.081724
spectral_flatness,-0.401151,-0.174296,-0.135986,0.476913,0.451839,-0.202607,-0.347521,-0.284507,1.0,-0.258176,...,-0.032859,-0.124136,-0.021235,-0.04587,-0.117974,-0.077946,-0.097121,-0.090456,-0.157056,-0.07038
rolloff,0.229024,0.200474,0.184225,-0.307517,-0.276703,0.931252,0.836252,0.230132,-0.258176,1.0,...,0.090262,0.050533,0.073281,-0.204051,0.314408,-0.261091,0.296709,-0.048404,0.202702,-0.039659


In [None]:
# Здесь поиск дубликатов тестовых аудио, но я его не сохранил :(
# В итоге они в duplicates.txt
# Все дубликаты попарны

In [None]:
duplicates = []
with open('./duplicates.txt') as f:
    for line in f:
        duplicates.append(line.split()[0][6:])

In [None]:
# Выбросы удалять не стал

In [None]:
#data = data.drop(['filename', 'spectral_centroid', 'rolloff', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc13', 'mfcc15', 'mfcc17', 'mfcc18', 'mfcc20', 'mfcc5', 'mfcc7', 'mfcc8', 'mfcc11'],axis=1)
data = data[[all([x not in duplicates for x in i]) for i in data['filename']]]
data = data.sample(frac=1).reset_index(drop=True)
d = {'human': 1, 'spoof': 0}
data = data.replace(d)

In [None]:
X = data.iloc[:, 1:-1]
y = np.array(data.iloc[:, -1])

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(X, dtype = float))
X_train = scaler.fit_transform(np.array(X_train, dtype = float))
X_test = scaler.transform(np.array(X_test, dtype=float))

In [None]:
model = Sequential()
regularisation_coef=1e-4
model.add(layers.Dense(128, activation='relu', bias_regularizer=l2(regularisation_coef), kernel_regularizer=l2(regularisation_coef), activity_regularizer=l2(regularisation_coef)))
model.add(layers.Dense(64, activation='relu', bias_regularizer=l2(regularisation_coef), kernel_regularizer=l2(regularisation_coef), activity_regularizer=l2(regularisation_coef)))
model.add(layers.Dense(32, activation='relu', bias_regularizer=l2(regularisation_coef), kernel_regularizer=l2(regularisation_coef), activity_regularizer=l2(regularisation_coef)))
# model.add(layers.Dense(16, activation='relu', bias_regularizer=l2(regularisation_coef), kernel_regularizer=l2(regularisation_coef), activity_regularizer=l2(regularisation_coef)))
model.add(layers.Dense(2, activation='sigmoid'))
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=[inverse_eer, 'mse', 'accuracy']) # sparse_categorical_crossentropy
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['mse', 'accuracy']) # sparse_categorical_crossentropy
callback = keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [None]:
classifier = model.fit(X, y, epochs=150, batch_size=128) #batch_size=128)# batch_size=128, validation_split=0.2)#callbacks=[callback], validation_split=0.2)

In [None]:
model.evaluate(X_test, y_test, batch_size=128)

In [None]:
##########

In [None]:
MAX_TREES = 150

values = np.arange(MAX_TREES) + 1
kf = KFold(n_splits=5, shuffle=True, random_state=1234)
global_scores = []

for train_indices, val_indices in tqdm(kf.split(X_train), total=5):
    scores = []
    
    X_train_kf = X_train[train_indices]
    y_train_kf = y_train[train_indices]
    
    X_val_kf = X_train[val_indices]
    y_val_kf = y_train[val_indices]
    
    forest = RandomForestClassifier(n_estimators=MAX_TREES)
    forest.fit(X_train_kf, y_train_kf)
    trees = forest.estimators_
    
    for number_of_trees in tqdm(values, leave=False):
        thinned_forest = RandomForestClassifier(n_estimators=number_of_trees)    
        thinned_forest.n_classes_ = 2
        thinned_forest.estimators_ = trees[:number_of_trees]
        scores.append(roc_auc_score(y_val_kf, thinned_forest.predict_proba(X_val_kf)[:, 1]))
    
    scores = np.array(scores)
    global_scores.append(scores)

global_scores = np.stack(global_scores, axis=0)

In [None]:
mean_cross_val_score = global_scores.mean(axis=0)
std_cross_val_score = global_scores.std(axis=0)

plt.figure(figsize=(15,8))
plt.title('Quality of random forest')

plt.plot(values, mean_cross_val_score, label='mean values', color='red', lw=3)
plt.fill_between(values, 
                 mean_cross_val_score - 2 * std_cross_val_score, 
                 mean_cross_val_score + 2 * std_cross_val_score, 
                 color='green', 
                 label='filled area between errors',
                 alpha=0.5)
legend_box = plt.legend(framealpha=1).get_frame()
legend_box.set_facecolor("white")
legend_box.set_edgecolor("black")
plt.xlabel('number of trees')
plt.ylabel('roc-auc')

plt.show()

In [None]:
forest = RandomForestClassifier(n_estimators=50, criterion='gini', n_jobs=-1)
forest.fit(X_train, y_train)
print(roc_auc_score(y_test, forest.predict_proba(X_test)[:, 1]))
print(f1_score(y_test, forest.predict(X_test)))
print(accuracy_score(y_test, forest.predict(X_test)))
print(inverse_eer(y_test, forest.predict_proba(X_test)[:, 1]))

In [None]:
###################

In [None]:
boosting_model = xgboost.XGBClassifier(n_estimators=500)
boosting_model.fit(X, y)

In [None]:
print(roc_auc_score(y_test, y_test_predicted))
print(compute_inverse_eer(y_test, y_test_predicted))
print(f1_score(y_test, boosting_model.predict(X_test)))
print(accuracy_score(y_test, boosting_model.predict(X_test)))

In [None]:
#######################

In [None]:
# Тестовые данные. Предсказание
data1 = pd.read_csv('testing-dataset.csv')
XTest = scaler.transform(np.array(data1.iloc[:, 1:], dtype = float))

In [None]:
with open("out.txt", 'w') as f:
    for filename, probability in zip(data1['filename'], boosting_model.predict_proba(XTest)):
        f.write(f'{filename}, {"%.12f" % (probability[1])}\n')