In [1]:
import os
import numpy as np
import pandas as pd
import librosa

def calculate_frequency_category(freq):
    # Define frequency category ranges
    categories = [
        (20, 40), (40, 80), (80, 160), (160, 300), (300, 600),
        (600, 1200), (1200, 2400), (2400, 5000), (5000, 10000),
        (10000, 20000), (20000, np.inf)
    ]
    
    # Determine frequency category based on input frequency
    for i, (low, high) in enumerate(categories):
        if low <= freq < high:
            return f'{low}-{high}'  # Return category range as string
    return '20000-inf'  # For frequencies > 20 kHz

def extract_frequency_features(audio_file, duration=5, sample_rate=44100):
    # Load audio file
    y, sr = librosa.load(audio_file, sr=sample_rate)
    
    # Split into sub-files of specified duration
    sub_files = librosa.effects.split(y, top_db=20, frame_length=duration*sample_rate, hop_length=duration*sample_rate)
    
    features = []
    
    for start, end in sub_files:
        sub_y = y[start:end]
        
        # Compute STFT
        D = librosa.stft(sub_y)
        
        # Compute magnitudes
        mag = np.abs(D)
        
        # Calculate average magnitude across time frames
        avg_mag = np.mean(mag, axis=1)
        
        # Calculate dB values
        db_values = librosa.amplitude_to_db(avg_mag)
        
        # Categorize dB values into frequency bands
        freq_bins = librosa.fft_frequencies(sr=sr)
        freq_categories = [calculate_frequency_category(freq) for freq in freq_bins]
        
        # Create dictionary to store category dB values
        category_dbs = {category: [] for category in set(freq_categories)}
        
        for i in range(len(freq_bins)):
            category = freq_categories[i]
            category_dbs[category].append(db_values[i])
        
        # Take average of dB values for each category
        category_avg_dbs = {category: np.mean(values) for category, values in category_dbs.items()}
        
        # Append to features list
        features.append(category_avg_dbs)
    
    return features

def load_audio_files(directory):
    """
    Load audio files from a directory, skipping any files that cannot be loaded.

    Parameters:
        directory (str): Path to the directory containing audio files.

    Returns:
        List of audio file paths that were successfully loaded.
    """
    audio_files = []

    # Iterate over files in the directory
    for file in os.listdir(directory):
        if file.endswith('.wav'):  # Assuming audio files are in WAV format
            try:
                librosa.load(os.path.join(directory, file))
                audio_files.append(os.path.join(directory, file))
            except Exception as e:
                print(f"Skipping file '{file}': {e}")

    return audio_files

def create_dataset(unmastered_dir, mastered_dir):
    """
    Create a dataset containing frequency category dB values for both unmastered and mastered audio files.
    
    Parameters:
        unmastered_dir (str): Path to the directory containing unmastered audio files.
        mastered_dir (str): Path to the directory containing mastered audio files.
    
    Returns:
        DataFrame containing frequency category dB values for each audio segment.
    """
    unmastered_files = load_audio_files(unmastered_dir)
    mastered_files = load_audio_files(mastered_dir)
    
    data = []
    
    # Process unmastered and mastered audio files together
    for unmastered_file, mastered_file in zip(unmastered_files, mastered_files):
        unmastered_features = extract_frequency_features(unmastered_file)
        mastered_features = extract_frequency_features(mastered_file)
        
        # Combine features from unmastered and mastered files into the same row
        for i in range(min(len(unmastered_features), len(mastered_features))):
            row = {'unmastered_audio_file': unmastered_file, 'mastered_audio_file': mastered_file}
            
            # Add unmastered dB values for each category range
            for category_range, db_value in sorted(unmastered_features[i].items()):
                row[f'unmastered_{category_range}'] = db_value
            
            # Add mastered dB values for each category range
            for category_range, db_value in sorted(mastered_features[i].items()):
                row[f'mastered_{category_range}'] = db_value
            
            data.append(row)
    
    # Create DataFrame from collected data
    df = pd.DataFrame(data)
    return df

# Specify directories containing unmastered and mastered audio files
unmastered_dir = 'unmastered_guitar'
mastered_dir = 'mastered_guitar'

# Create dataset DataFrame
dataset = create_dataset(unmastered_dir, mastered_dir)

# Save dataset to CSV
dataset.to_csv('audio_features_dataset.csv', index=False)


In [2]:
import os
import numpy as np
import pandas as pd
import librosa

def calculate_frequency_category(freq):
    # Define frequency category ranges
    categories = [
        (20, 40), (40, 80), (80, 160), (160, 300), (300, 600),
        (600, 1200), (1200, 2400), (2400, 5000), (5000, 10000),
        (10000, 20000), (20000, np.inf)
    ]
    
    # Determine frequency category based on input frequency
    for i, (low, high) in enumerate(categories):
        if low <= freq < high:
            return f'{low}-{high}'  # Return category range as string
    return '20000-inf'  # For frequencies > 20 kHz

def extract_frequency_features(audio_file, duration=5, sample_rate=44100):
    # Load audio file
    y, sr = librosa.load(audio_file, sr=sample_rate)
    
    # Split into sub-files of specified duration
    sub_files = librosa.effects.split(y, top_db=20, frame_length=duration*sample_rate, hop_length=duration*sample_rate)
    
    features = []
    
    for start, end in sub_files:
        sub_y = y[start:end]
        
        # Compute STFT
        D = librosa.stft(sub_y)
        
        # Compute magnitudes
        mag = np.abs(D)
        
        # Calculate average magnitude across time frames
        avg_mag = np.mean(mag, axis=1)
        
        # Calculate dB values
        db_values = librosa.amplitude_to_db(avg_mag)
        
        # Categorize dB values into frequency bands
        freq_bins = librosa.fft_frequencies(sr=sr)
        freq_categories = [calculate_frequency_category(freq) for freq in freq_bins]
        
        # Create dictionary to store category dB values
        category_dbs = {category: [] for category in set(freq_categories)}
        
        for i in range(len(freq_bins)):
            category = freq_categories[i]
            category_dbs[category].append(db_values[i])
        
        # Take average of dB values for each category
        category_avg_dbs = {category: np.mean(values) for category, values in category_dbs.items()}
        
        # Append to features list
        features.append(category_avg_dbs)
    
    return features

def load_audio_files(directory):
    """
    Load audio files from a directory.
    
    Parameters:
        directory (str): Path to the directory containing audio files.
    
    Returns:
        List of audio file paths.
    """
    audio_files = []
    
    # Iterate over files in the directory
    for file in os.listdir(directory):
        if file.endswith('.wav'):  # Assuming audio files are in WAV format
            audio_files.append(os.path.join(directory, file))
    
    return audio_files

def create_dataset(unmastered_dir, mastered_dir):
    """
    Create a dataset containing frequency category dB values for both unmastered and mastered audio files.
    
    Parameters:
        unmastered_dir (str): Path to the directory containing unmastered audio files.
        mastered_dir (str): Path to the directory containing mastered audio files.
    
    Returns:
        DataFrame containing frequency category dB values for each audio segment.
    """
    unmastered_files = load_audio_files(unmastered_dir)
    mastered_files = load_audio_files(mastered_dir)
    
    data = []
    
    # Process unmastered and mastered audio files together
    for unmastered_file, mastered_file in zip(unmastered_files, mastered_files):
        unmastered_features = extract_frequency_features(unmastered_file)
        mastered_features = extract_frequency_features(mastered_file)
        
        # Combine features from unmastered and mastered files into the same row
        for i in range(min(len(unmastered_features), len(mastered_features))):
            row = {'unmastered_audio_file': unmastered_file, 'mastered_audio_file': mastered_file}
            
            # Add unmastered dB values for each category range
            for category_range, db_value in sorted(unmastered_features[i].items()):
                row[f'unmastered_{category_range}'] = db_value
            
            # Add mastered dB values for each category range
            for category_range, db_value in sorted(mastered_features[i].items()):
                row[f'mastered_{category_range}'] = db_value
            
            data.append(row)
    
    # Create DataFrame from collected data
    df = pd.DataFrame(data)
    
    # Reorder columns based on frequency category ranges
    category_ranges = [
        f'{low}-{high}' for (low, high) in [
            (20, 40), (40, 80), (80, 160), (160, 300), (300, 600),
            (600, 1200), (1200, 2400), (2400, 5000), (5000, 10000),
            (10000, 20000), (20000, np.inf)
        ]
    ]
    
    # Create a list of column names in the desired order
    column_order = []
    for prefix in ['unmastered_', 'mastered_']:
        for category_range in category_ranges:
            column_order.append(f'{prefix}{category_range}')
    
    # Rearrange columns in DataFrame
    df = df[column_order]
    
    return df

# Specify directories containing unmastered and mastered audio files
unmastered_dir = 'unmastered_guitar'
mastered_dir = 'mastered_guitar'

# Create dataset DataFrame
dataset = create_dataset(unmastered_dir, mastered_dir)

# Save dataset to CSV
dataset.to_csv('audio_features_dataset1.csv', index=False)


In [13]:
import os
import numpy as np
import pandas as pd
import librosa

def calculate_frequency_category(freq):
    # Define frequency category ranges
    categories = [
        (20, 40), (40, 80), (80, 160), (160, 300), (300, 600),
        (600, 1200), (1200, 2400), (2400, 5000), (5000, 10000),
        (10000, 20000), (20000, np.inf)
    ]
    
    # Determine frequency category based on input frequency
    for i, (low, high) in enumerate(categories):
        if low <= freq < high:
            return f'{low}-{high}'  # Return category range as string
    return '20000-inf'  # For frequencies > 20 kHz

def extract_frequency_features(audio_file, duration=5, sample_rate=44100):
    # Load audio file
    y, sr = librosa.load(audio_file, sr=sample_rate)
    
    # Split into sub-files of specified duration
    sub_files = librosa.effects.split(y, top_db=20, frame_length=duration*sample_rate, hop_length=duration*sample_rate)
    
    features = []
    
    for start, end in sub_files:
        sub_y = y[start:end]
        
        # Compute STFT
        D = librosa.stft(sub_y)
        
        # Compute magnitudes
        mag = np.abs(D)
        
        # Calculate average magnitude across time frames
        avg_mag = np.mean(mag, axis=1)
        
        # Calculate dB values
        db_values = librosa.amplitude_to_db(avg_mag)
        
        # Categorize dB values into frequency bands
        freq_bins = librosa.fft_frequencies(sr=sr)
        freq_categories = [calculate_frequency_category(freq) for freq in freq_bins]
        
        # Create dictionary to store category dB values
        category_dbs = {category: [] for category in set(freq_categories)}
        
        for i in range(len(freq_bins)):
            category = freq_categories[i]
            category_dbs[category].append(db_values[i])
        
        # Take average of dB values for each category
        category_avg_dbs = {category: np.mean(values) for category, values in category_dbs.items()}
        
        # Append to features list
        features.append(category_avg_dbs)
    
    return features

def load_audio_files(directory):
    """
    Load audio files from a directory.
    
    Parameters:
        directory (str): Path to the directory containing audio files.
    
    Returns:
        List of audio file paths.
    """
    audio_files = []
    
    # Iterate over files in the directory
    for file in os.listdir(directory):
        if file.endswith('.wav'):  # Assuming audio files are in WAV format
            audio_files.append(os.path.join(directory, file))
    
    return audio_files

def create_dataset(unmastered_dir, mastered_dir):
    """
    Create a dataset containing frequency category dB values for both unmastered and mastered audio files.
    
    Parameters:
        unmastered_dir (str): Path to the directory containing unmastered audio files.
        mastered_dir (str): Path to the directory containing mastered audio files.
    
    Returns:
        DataFrame containing frequency category dB values along with file names for each audio segment.
    """
    unmastered_files = load_audio_files(unmastered_dir)
    mastered_files = load_audio_files(mastered_dir)
    
    data = []
    
    # Process unmastered and mastered audio files together
    for unmastered_file, mastered_file in zip(unmastered_files, mastered_files):
        unmastered_features = extract_frequency_features(unmastered_file)
        mastered_features = extract_frequency_features(mastered_file)
        
        # Combine features from unmastered and mastered files into the same row
        for i in range(min(len(unmastered_features), len(mastered_features))):
            row = {'unmastered_audio_file': os.path.basename(unmastered_file), 'mastered_audio_file': os.path.basename(mastered_file)}
            
            # Add unmastered dB values for each category range
            for category_range, db_value in sorted(unmastered_features[i].items()):
                row[f'unmastered_{category_range}'] = db_value
            
            # Add mastered dB values for each category range
            for category_range, db_value in sorted(mastered_features[i].items()):
                row[f'mastered_{category_range}'] = db_value
            
            data.append(row)
    
    # Create DataFrame from collected data
    df = pd.DataFrame(data)
    
    # Reorder columns based on frequency category ranges
    category_ranges = [
        f'{low}-{high}' for (low, high) in [
            (20, 40), (40, 80), (80, 160), (160, 300), (300, 600),
            (600, 1200), (1200, 2400), (2400, 5000), (5000, 10000),
            (10000, 20000), (20000, np.inf)
        ]
    ]
    
    # Create a list of column names in the desired order
    column_order = []
    for prefix in ['unmastered_', 'mastered_']:
        for category_range in category_ranges:
            column_order.append(f'{prefix}{category_range}')
    
    # Add file name columns at the beginning of the DataFrame
    column_order = ['unmastered_audio_file', 'mastered_audio_file'] + column_order
    
    # Rearrange columns in DataFrame
    df = df[column_order]
    
    return df

# Specify directories containing unmastered and mastered audio files
unmastered_dir = 'unmastered_vocal'
mastered_dir = 'mastered_vocal'

# Create dataset DataFrame
dataset = create_dataset(unmastered_dir, mastered_dir)

# Save dataset to CSV
dataset.to_csv('audio_features_vocal.csv', index=False)


In [14]:
import pandas as pd

# Load the CSV file into a DataFrame
input_csv_path = 'audio_features_vocal.csv'
df = pd.read_csv(input_csv_path)

# Define the frequency categories to remove
categories_to_remove = ['<20-40', '20000-inf']  # Adjust as needed based on your category naming convention

# Generate a list of columns to drop
columns_to_drop = []
for category in categories_to_remove:
    columns_to_drop.extend([f'unmastered_{category}', f'mastered_{category}'])

# Drop the specified columns from the DataFrame
df_cleaned = df.drop(columns=columns_to_drop, errors='ignore')

# Save the cleaned DataFrame to a new CSV file
output_csv_path = 'output_dataset_vocal.csv'
df_cleaned.to_csv(output_csv_path, index=False)

print(f"Cleaned dataset saved to: {output_csv_path}")


Cleaned dataset saved to: output_dataset_vocal.csv


In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split





In [2]:
# Load your dataset (assumed to be in a CSV file)
dataset_path = 'final_guitar_dataset.csv'
df = pd.read_csv(dataset_path)

# Extract features (gain values for each frequency range) and target variables
X = df[['unmastered_20-40', 'unmastered_40-80', 'unmastered_80-160','unmastered_160-300','unmastered_300-600','unmastered_600-1200','unmastered_1200-2400','unmastered_2400-5000','unmastered_5000-10000','unmastered_10000-20000']].values
y = df[['mastered_20-40', 'mastered_40-80', 'mastered_80-160','mastered_160-300','mastered_300-600','mastered_600-1200','mastered_1200-2400','mastered_2400-5000','mastered_5000-10000','mastered_10000-20000']].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
# Define the neural network architecture
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(10,)),  # Input layer with 10 features
    keras.layers.Dense(32, activation='relu'),  # Hidden layer with 32 neurons
    keras.layers.Dense(10)  # Output layer with 10 neurons (one for each frequency range)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Print the model summary
model.summary()




Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                704       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 10)                330       
                                                                 
Total params: 3114 (12.16 KB)
Trainable params: 3114 (12.16 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [6]:
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, callbacks=[early_stopping])

Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [4]:
# Train the model
from tensorflow.keras.callbacks import ModelCheckpoint
checkpointer=ModelCheckpoint(filepath='Predictor_models/audio_predictor.hdf5',verbose=1,save_best_only=True)
history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_split=0.1, callbacks=checkpointer)

Epoch 1/500


Epoch 1: val_loss improved from inf to 805.12805, saving model to Predictor_models\audio_predictor.hdf5
Epoch 2/500
Epoch 2: val_loss improved from 805.12805 to 731.98529, saving model to Predictor_models\audio_predictor.hdf5
Epoch 3/500
Epoch 3: val_loss improved from 731.98529 to 663.46844, saving model to Predictor_models\audio_predictor.hdf5
Epoch 4/500
Epoch 4: val_loss improved from 663.46844 to 599.60645, saving model to Predictor_models\audio_predictor.hdf5


  saving_api.save_model(


Epoch 5/500
Epoch 5: val_loss improved from 599.60645 to 540.86810, saving model to Predictor_models\audio_predictor.hdf5
Epoch 6/500
Epoch 6: val_loss improved from 540.86810 to 487.39417, saving model to Predictor_models\audio_predictor.hdf5
Epoch 7/500
Epoch 7: val_loss improved from 487.39417 to 440.07043, saving model to Predictor_models\audio_predictor.hdf5
Epoch 8/500
Epoch 8: val_loss improved from 440.07043 to 397.59528, saving model to Predictor_models\audio_predictor.hdf5
Epoch 9/500
Epoch 9: val_loss improved from 397.59528 to 359.00079, saving model to Predictor_models\audio_predictor.hdf5
Epoch 10/500
Epoch 10: val_loss improved from 359.00079 to 323.04407, saving model to Predictor_models\audio_predictor.hdf5
Epoch 11/500
Epoch 11: val_loss improved from 323.04407 to 288.99454, saving model to Predictor_models\audio_predictor.hdf5
Epoch 12/500
Epoch 12: val_loss improved from 288.99454 to 257.44064, saving model to Predictor_models\audio_predictor.hdf5
Epoch 13/500
Epoch

In [None]:
loss: 2.5853 - mae: 1.1865 - val_loss: 10.8167 - val_mae: 2.2340

In [11]:
# Evaluate the model on the test data
from sklearn.metrics import r2_score
loss, mae = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)

# Calculate R-squared (Coefficient of Determination)
r_squared = r2_score(y_test, y_pred)
print(f"Test Loss: {loss}")
print(f"Test Mean Absolute Error: {mae}")
print(f"r2_score:{r_squared}")

Test Loss: 6.143632888793945
Test Mean Absolute Error: 1.7143665552139282
r2_score:0.8329176513914079
