In [5]:
import os
import shutil

# Define the source directory (the one with the folders containing files)
source_dir = r'C:\Users\sahil\Downloads\emotion_analyser\files'
# Define the new directory where you want to save the files
new_dir = r'C:\Users\sahil\Downloads\emotion_analyser\files1'

# Create the new directory if it doesn't exist
os.makedirs(new_dir, exist_ok=True)

# Iterate over each folder in the source directory
for folder in os.listdir(source_dir):
    folder_path = os.path.join(source_dir, folder)
    
    # Check if it's a directory
    if os.path.isdir(folder_path):
        # Get the audio files in the current folder
        audio_files = [f for f in os.listdir(folder_path) if f.endswith('.wav') or f.endswith('.mp3')]

        # Create a folder for each audio file
        for file_name in audio_files:
            # Create a folder name based on the audio file name (without extension)
            folder_name = os.path.splitext(file_name)[0]
            target_folder = os.path.join(new_dir, folder_name)

            # Create the target folder if it doesn't exist
            os.makedirs(target_folder, exist_ok=True)

            # Define the source file path
            src_file_path = os.path.join(folder_path, file_name)
            
            # Prepare to copy the file with a unique name
            base_name, ext = os.path.splitext(file_name)
            counter = 1
            new_file_name = file_name
            
            # Check for existing files and append a number if necessary
            while os.path.exists(os.path.join(target_folder, new_file_name)):
                new_file_name = f"{base_name}_{counter}{ext}"
                counter += 1
            
            # Define the new file path
            new_file_path = os.path.join(target_folder, new_file_name)
            
            # Copy the file to the new directory
            shutil.copy(src_file_path, new_file_path)

print("Files have been organized into new folders named after the audio files, with unique names.")


Files have been organized into new folders named after the audio files, with unique names.


In [6]:
import os
import pandas as pd

# Define the new directory where the organized files are stored
new_dir = r'C:\Users\sahil\Downloads\emotion_analyser\files1'

# List to hold file paths and their corresponding class names
data = []

# Iterate over each folder in the new directory
for folder in os.listdir(new_dir):
    folder_path = os.path.join(new_dir, folder)

    # Check if it's a directory
    if os.path.isdir(folder_path):
        # Get the audio files in the current folder
        audio_files = [f for f in os.listdir(folder_path) if f.endswith('.wav') or f.endswith('.mp3')]

        # Append file paths and class names to the data list
        for file_name in audio_files:
            file_path = os.path.join(folder_path, file_name)
            data.append({"location": file_path, "class": folder})

# Create a DataFrame from the list
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv(os.path.join(new_dir, 'file_locations.csv'), index=False)

print("The DataFrame has been created and saved to 'file_locations.csv'.")

The DataFrame has been created and saved to 'file_locations.csv'.


In [8]:
df['class'].value_counts()

euphoric     17
joyfully     17
sad          17
surprised    17
Name: class, dtype: int64

In [9]:
import os
import pandas as pd
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Define the new directory where the organized files are stored
new_dir = r'C:\Users\sahil\Downloads\emotion_analyser\files1'

# Load the existing DataFrame containing file locations and class names
df = pd.read_csv(os.path.join(new_dir, 'file_locations.csv'))

# Label encode the class column
label_encoder = LabelEncoder()
df['class_encoded'] = label_encoder.fit_transform(df['class'])

# Function to calculate MFCCs
def calculate_mfcc(file_path):
    try:
        # Load the audio file
        signal, sr = librosa.load(file_path, sr=None)
        # Calculate MFCCs
        mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)  # 13 MFCCs
        # Take the mean of MFCCs across time
        mfccs_mean = np.mean(mfccs, axis=1)
        return mfccs_mean
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Calculate MFCCs for each audio file
mfcc_data = []
for index, row in df.iterrows():
    mfccs = calculate_mfcc(row['location'])
    if mfccs is not None:
        mfcc_data.append(mfccs)
    else:
        mfcc_data.append([None] * 13)  # Placeholder for missing data

# Create a new DataFrame with MFCCs
mfcc_df = pd.DataFrame(mfcc_data, columns=[f'mfcc_{i+1}' for i in range(13)])  # 13 MFCCs

# Combine the original DataFrame with the MFCC DataFrame
final_df = pd.concat([df, mfcc_df], axis=1)

# Save the final DataFrame to a new CSV file
final_df.to_csv(os.path.join(new_dir, 'final_audio_data.csv'), index=False)

print("Label encoding and MFCC calculation completed. Saved to 'final_audio_data.csv'.")


Label encoding and MFCC calculation completed. Saved to 'final_audio_data.csv'.


In [10]:
final_df.head()

Unnamed: 0,location,class,class_encoded,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13
0,C:\Users\sahil\Downloads\emotion_analyser\file...,euphoric,0,-596.27124,105.507523,-16.348253,34.213352,-7.040627,9.673249,15.672894,-18.594084,12.680883,-3.053727,-9.320203,5.608082,-5.410851
1,C:\Users\sahil\Downloads\emotion_analyser\file...,euphoric,0,-359.78244,72.187683,27.768223,38.32843,5.198799,8.380541,10.607283,-13.184477,11.505762,-4.186356,-6.031159,2.438568,-6.354579
2,C:\Users\sahil\Downloads\emotion_analyser\file...,euphoric,0,-424.573425,57.615242,12.12886,49.243443,-2.526363,11.702657,11.698957,-2.770733,-1.388484,-1.123289,-11.638446,14.478795,-10.840614
3,C:\Users\sahil\Downloads\emotion_analyser\file...,euphoric,0,-524.174438,103.392075,-7.327183,56.39101,10.343275,4.731972,23.960911,-0.654973,6.595408,4.164212,-6.080491,7.985336,0.112556
4,C:\Users\sahil\Downloads\emotion_analyser\file...,euphoric,0,-519.289429,101.250862,20.647596,29.956661,8.789474,8.442513,3.100049,5.523725,0.429982,5.303805,-0.014299,4.767339,0.963716


In [12]:
final_df = final_df.drop(columns=['location', 'class'])

In [13]:
final_df.head()

Unnamed: 0,class_encoded,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13
0,0,-596.27124,105.507523,-16.348253,34.213352,-7.040627,9.673249,15.672894,-18.594084,12.680883,-3.053727,-9.320203,5.608082,-5.410851
1,0,-359.78244,72.187683,27.768223,38.32843,5.198799,8.380541,10.607283,-13.184477,11.505762,-4.186356,-6.031159,2.438568,-6.354579
2,0,-424.573425,57.615242,12.12886,49.243443,-2.526363,11.702657,11.698957,-2.770733,-1.388484,-1.123289,-11.638446,14.478795,-10.840614
3,0,-524.174438,103.392075,-7.327183,56.39101,10.343275,4.731972,23.960911,-0.654973,6.595408,4.164212,-6.080491,7.985336,0.112556
4,0,-519.289429,101.250862,20.647596,29.956661,8.789474,8.442513,3.100049,5.523725,0.429982,5.303805,-0.014299,4.767339,0.963716


In [14]:
y=final_df['class_encoded']
final_df = final_df.drop(columns=['class_encoded'])
final_df.head()

Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12,mfcc_13
0,-596.27124,105.507523,-16.348253,34.213352,-7.040627,9.673249,15.672894,-18.594084,12.680883,-3.053727,-9.320203,5.608082,-5.410851
1,-359.78244,72.187683,27.768223,38.32843,5.198799,8.380541,10.607283,-13.184477,11.505762,-4.186356,-6.031159,2.438568,-6.354579
2,-424.573425,57.615242,12.12886,49.243443,-2.526363,11.702657,11.698957,-2.770733,-1.388484,-1.123289,-11.638446,14.478795,-10.840614
3,-524.174438,103.392075,-7.327183,56.39101,10.343275,4.731972,23.960911,-0.654973,6.595408,4.164212,-6.080491,7.985336,0.112556
4,-519.289429,101.250862,20.647596,29.956661,8.789474,8.442513,3.100049,5.523725,0.429982,5.303805,-0.014299,4.767339,0.963716


In [15]:
combined_df = pd.concat([final_df, y], axis=1)

In [16]:
combined_df.to_csv('final.csv')