In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.utils.class_weight import compute_class_weight
from sklearn.compose import ColumnTransformer

import joblib

import matplotlib.pyplot as plt
import seaborn as sns
import os
import librosa

In [2]:
cwd = os.getcwd()

In [3]:
parent_dir = os.path.dirname(cwd)

In [5]:
trained_pipeline = joblib.load(f'{parent_dir}/FeatureSelection/SVM_MFCC_Rec_Only.pkl')

In [8]:
from sklearn.base import BaseEstimator, TransformerMixin

In [21]:
class FeatureExtractor(BaseEstimator, TransformerMixin):
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, file_paths):
        features_df = pd.DataFrame(columns=['mfcc'])
        for file in file_paths:
            y, sr = librosa.load(file, sr=16000)
            mfcc = librosa.feature.mfcc(y=y, sr=sr)
            features_df.loc[len(features_df)] = [mfcc]
        
        return features_df

In [22]:
new_pipeline = Pipeline([
    ('features', FeatureExtractor()),  # Step 1: Extract features
    ('existing', trained_pipeline)     # Step 2: Use the already trained pipeline
])

In [27]:
audio_dir = f'{parent_dir}/Audio/'
test_audio = [f'{audio_dir}/Atrophy-0A00e04.wav', f'{audio_dir}/Laryngeal cancer-0702gr9.wav']
# test_audio = [f'{audio_dir}/Atrophy-0A00e04.wav']

In [31]:
y_pred = new_pipeline.predict(test_audio)

In [32]:
y_pred

array(['Benign', 'Malignant'], dtype=object)