<a href="https://colab.research.google.com/github/riya-maurya/codsoft/blob/main/Sound_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import soundfile
import numpy as np
import librosa
import glob
import os
import pickle
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
!unzip  "/content/drive/My Drive/Dataset.zip"

Archive:  /content/drive/My Drive/Dataset.zip
   creating: Actor_01/
 extracting: Actor_01/03-01-03-02-02-01-01.wav  
 extracting: Actor_01/03-01-02-01-01-02-01.wav  
 extracting: Actor_01/03-01-02-01-02-01-01.wav  
 extracting: Actor_01/03-01-03-02-01-02-01.wav  
 extracting: Actor_01/03-01-01-01-01-02-01.wav  
 extracting: Actor_01/03-01-02-01-02-02-01.wav  
 extracting: Actor_01/03-01-02-02-02-02-01.wav  
 extracting: Actor_01/03-01-02-02-01-01-01.wav  
 extracting: Actor_01/03-01-03-01-02-02-01.wav  
 extracting: Actor_01/03-01-02-02-02-01-01.wav  
 extracting: Actor_01/03-01-01-01-01-01-01.wav  
 extracting: Actor_01/03-01-02-01-01-01-01.wav  
 extracting: Actor_01/03-01-01-01-02-01-01.wav  
 extracting: Actor_01/03-01-03-02-02-02-01.wav  
 extracting: Actor_01/03-01-02-02-01-02-01.wav  
 extracting: Actor_01/03-01-01-01-02-02-01.wav  
 extracting: Actor_01/03-01-03-01-02-01-01.wav  
 extracting: Actor_01/03-01-03-01-01-02-01.wav  
 extracting: Actor_01/03-01-03-01-01-01-01.wav  


In [None]:
def extract_feature(file_name, **kwargs):

    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
    return result

In [None]:
int2emotion = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

In [None]:
AVAILABLE_EMOTIONS = {
    "angry",
    "sad",
    "neutral",
    "happy"
}


In [None]:
def extract_feature(file, mfcc, chroma, mel):
    with soundfile.SoundFile(file) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
    return result



def load_data(test_size=0.2):
    X, y = [], []
    for file in glob.glob("Actor_*/*.wav"):
        basename = os.path.basename(file)
        emotion = int2emotion[basename.split("-")[2]]
        if emotion not in AVAILABLE_EMOTIONS:
            continue

        features = extract_feature(file, mfcc=True, chroma=True, mel=True)
        X.append(features)
        y.append(emotion)

    return train_test_split(np.array(X), y, test_size=test_size, random_state=7)

In [None]:
X_train, X_test, y_train, y_test = load_data(test_size=0.25)

print("Number of training samples:", len(X_train))
print("Number of test samples:", len(X_test))

Number of training samples: 504
Number of test samples: 168


In [None]:
print("[+] Shape of X_train:", X_train.shape)
print("[+] Shape of X_test:", X_test.shape)


print("[+] Number of training samples:", X_train.shape[0])
print("[+] Number of testing samples:", X_test.shape[0])


if len(X_train.shape) > 1:
    print("[+] Number of features:", X_train.shape[1])
else:
    print("No features extracted or loaded.")


[+] Shape of X_train: (504, 180)
[+] Shape of X_test: (168, 180)
[+] Number of training samples: 504
[+] Number of testing samples: 168
[+] Number of features: 180


In [None]:
model_params = {
    'alpha': 0.01,
    'batch_size': 256,
    'epsilon': 1e-08,
    'hidden_layer_sizes': (300,),
    'learning_rate': 'adaptive',
    'max_iter': 500,
}

In [None]:
model = MLPClassifier(**model_params)

In [None]:
print("Shape of X_train:", X_train.shape)


print("[*] Training the model...")
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)

print("Accuracy: {:.2f}%".format(accuracy*100))

Shape of X_train: (483, 180)
[*] Training the model...
Accuracy: 74.53%


In [None]:
if not os.path.isdir("result"):
    os.mkdir("result")

pickle.dump(model, open("result/mlp_classifier.model", "wb"))