## 1. Import libraries

In [1]:
import os
import pickle
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt
from model import extract_features_from_path

## 2. Data preprocessing

In [2]:
# Test data path
test_metadata_path = './2501ml_data/label/test_label.txt'
test_data_path     = './2501ml_data/test'

In [3]:
# Load model
model_path = './model.pkl'
with open(model_path, 'rb') as f:
    best_pipe = pickle.load(f)

print(best_pipe)

Pipeline(steps=[('scaler', StandardScaler()),
                ('kpca', KernelPCA(kernel='rbf', n_components=15)),
                ('lda', LinearDiscriminantAnalysis(n_components=1)),
                ('svc',
                 SVC(C=10, class_weight={'Fake': 9, 'Real': 1}, gamma=0.01,
                     probability=True))])


In [4]:
# Load test data
test_x = []
test_y = []
test_file_names = []

with open(test_metadata_path, 'r', encoding='utf-8') as f:
    for line in f:
        spk, file_name, _, _, label = line.strip().split(' ')
        wav_path = os.path.join(test_data_path, file_name)
        features = extract_features_from_path(wav_path)
        test_x.append(features)
        test_y.append(label)
        test_file_names.append(file_name)

test_x = np.array(test_x)
test_y = np.array(test_y)

In [5]:
# predict & evaluate
predictions = best_pipe.predict(test_x)

acc = accuracy_score(test_y, predictions)
f1  = f1_score(test_y, predictions, average='weighted')
print(f"Test Accuracy: {acc:.4f}")
print(f"Test Weighted F1 Score: {f1:.4f}")

Test Accuracy: 0.9620
Test Weighted F1 Score: 0.9620


## 3. Create prediction results file

In [6]:
with open('./etc/team_test_result.txt', 'w') as f:
    for i in range(len(predictions)):
        f.write(f"{test_file_names[i]} {predictions[i]}\n")

In [7]:
!perl ./etc/eval.pl ./etc/team_test_result.txt ./2501ml_data/label/test_label.txt

Test: ./etc/team_test_result.txt
True: ./2501ml_data/label/test_label.txt
Accuracy: 96.20%
Hit: 1924, Total: 2000
