In [1]:
import os
import librosa
import numpy as np

def load_audio_files(data_path, sr=22050):
    audio_files = []
    labels = []
    for root, _, files in os.walk(data_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                y, _ = librosa.load(file_path, sr=sr)
                audio_files.append(y)
                labels.append(0 if 'normal' in file_path else 1)
    return audio_files, labels

def extract_features(audio_files, sr=22050, n_mfcc=13):
    features = []
    for audio in audio_files:
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs.T, axis=0)
        features.append(mfccs_mean)
    return np.array(features)

# Example usage
machine_types = ['slider', 'fan', 'pump', 'valve']
base_path = 'v'

for machine in machine_types:
    print(f"Loading data for {machine}...")
    machine_path = os.path.join(base_path, machine)
    audio_files, labels = load_audio_files(machine_path)
    
    if not audio_files:
        print(f"No audio files found for {machine}.")
        continue

    features = extract_features(audio_files)
    labels = np.array(labels)

    if len(features) < 2:  # Ensure there are enough samples to split
        print(f"Not enough samples to create train/test split for {machine}.")
        continue

    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier
    import pickle

    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)
    
    model_filename = f'{machine}_model.pkl'
    with open(model_filename, 'wb') as file:
        pickle.dump(rf_model, file)
    
    print(f"Model for {machine} saved as {model_filename}.")


Loading data for slider...
Model for slider saved as slider_model.pkl.
Loading data for fan...
Model for fan saved as fan_model.pkl.
Loading data for pump...
Model for pump saved as pump_model.pkl.
Loading data for valve...
Model for valve saved as valve_model.pkl.


In [17]:

import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pickle
def load_audio_files(data_path, sr=22050):
    audio_files = []
    labels = []
    for root, dirs, files in os.walk(data_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                y, _ = librosa.load(file_path, sr=sr)
                audio_files.append(y)
                # Determine label based on folder name
                if 'normal' in root:
                    labels.append(0)  # Normal
                else:
                    labels.append(1)  # Abnormal
    return audio_files, labels

def extract_features(audio_files, sr=22050, n_mfcc=13):
    features = []
    for audio in audio_files:
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs.T, axis=0)
        features.append(mfccs_mean)
    return np.array(features)

# Directory containing the machine type folders
base_path = 'v'
machine_types = ['slider', 'fan', 'pump', 'valve']

for machine in machine_types:
    print(f"Loading data for {machine}...")
    machine_path = os.path.join(base_path, machine)
    audio_files, labels = load_audio_files(machine_path)
    
    if not audio_files:
        print(f"No audio files found for {machine}.")
        continue

    features = extract_features(audio_files)
    labels = np.array(labels)

    if len(features) < 2:  # Ensure there are enough samples to split
        print(f"Not enough samples to create train/test split for {machine}.")
        continue

    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)
    
    model_filename = f'{machine}_model.pkl'
    with open(model_filename, 'wb') as file:
        pickle.dump(rf_model, file)
    
    print(f"Model for {machine} saved as {model_filename}.")


Loading data for slider...
Model for slider saved as slider_model.pkl.
Loading data for fan...
Model for fan saved as fan_model.pkl.
Loading data for pump...
Model for pump saved as pump_model.pkl.
Loading data for valve...
Model for valve saved as valve_model.pkl.
