In [1]:
import librosa
import numpy as np
import xgboost as xgb

# Feature extraction functions
def extract_mfcc(y):
    return np.mean(librosa.feature.mfcc(y=y, sr=16000, n_mfcc=13).T, axis=0)

def extract_delta(y):
    return np.mean(librosa.feature.delta(librosa.feature.mfcc(y=y, sr=16000, n_mfcc=13)).T, axis=0)

def extract_mel_spectrogram(y):
    return np.mean(librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=16000), ref=np.max).T, axis=0)

def extract_zero_crossing_rate(y):
    return np.mean(librosa.feature.zero_crossing_rate(y).T, axis=0)

def extract_chroma(y):
    return np.mean(librosa.feature.chroma_stft(y=y, sr=16000).T, axis=0)

def extract_cqt(y):
    return np.mean(np.abs(librosa.cqt(y, sr=16000)).T, axis=0)

# Preprocess the audio and extract features
def preprocess_and_extract_features(file):
    y, sr = librosa.load(file, sr=16000)  # Load the audio file

    # Trim silence
    y, _ = librosa.effects.trim(y)

    # Pad or truncate to 5 seconds
    max_len = sr * 5
    if len(y) < max_len:
        y = np.pad(y, (0, max_len - len(y)), mode='constant')
    else:
        y = y[:max_len]

    # Extract features
    mfcc = extract_mfcc(y)
    delta_mfcc = extract_delta(y)
    mel_spectrogram = extract_mel_spectrogram(y)
    zcr = np.array([extract_zero_crossing_rate(y)])  # Make it a 1D array
    chroma = extract_chroma(y)
    cqt = extract_cqt(y)

    # Combine all extracted features into one feature vector (flattened)
    return np.concatenate([mfcc.flatten(), delta_mfcc.flatten(), mel_spectrogram.flatten(), zcr.flatten(), chroma.flatten(), cqt.flatten()])

# Load pre-trained XGBoost model and make predictions
def predict_audio_class(file, model_path):
    # Load the XGBoost model
    model = xgb.XGBClassifier()
    model.load_model(model_path)

    # Preprocess the audio file and extract features
    features = preprocess_and_extract_features(file)

    # Convert features to 2D array (1 sample) for prediction
    features = np.array([features])

    # Make predictions
    prediction = model.predict(features)
    probability = model.predict_proba(features)

    return prediction, probability

# File path and model
audio_file = r"C:\Users\hiran\Downloads\What_ Short video clip designed with Canva.mp3"
model_path = r"C:\Users\hiran\AI 1 0\Audio\XGBoost\xgb_audio_model.json"

# Predict the class for the audio file
predicted_class, predicted_proba = predict_audio_class(audio_file, model_path)

# Output results
print(f"Predicted class: {predicted_class}")
print(f"Predicted probabilities: {predicted_proba}")

Predicted class: [0]
Predicted probabilities: [[0.9686394  0.03136061]]


  return pitch_tuning(
