In [1]:

import os
import random
import sys
import glob
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import warnings
from sklearn.model_selection import StratifiedShuffleSplit
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
from tqdm import tqdm
import plotly.offline as py

warnings.simplefilter("ignore")
pd.set_option('display.max_columns', None)
py.init_notebook_mode(connected=True)

In [9]:
import os
import pandas as pd

def metadata(basepath):
    df = pd.DataFrame(columns=['path', 'source', 'actor', 'gender', 'intensity', 'statement', 'repetition', 'emotion'])
    count = 0

    for sub_dir in os.listdir(basepath):
        sub_dir_path = os.path.join(basepath, sub_dir)

        if os.path.isdir(sub_dir_path):
            for f in os.listdir(sub_dir_path):
                filename = f.split('.')[0].split('-')
                if len(filename) == 7:
                    path = os.path.join(sub_dir_path, f)
                    src = int(filename[1])
                    actor = int(filename[-1].split()[0]) 
                    emotion = int(filename[2])
                    gender = "female" if int(actor) % 2 == 0 else "male"
                    intensity = 0 if filename[3] == '01' else 1
                    statement = 0 if filename[4] == '01' else 1
                    repeat = 0 if filename[5] == '01' else 1

                    df.loc[count] = [path, src, actor, gender, intensity, statement, repeat, emotion]
                    count += 1

    labels = []
    for i in range(len(df)):
        if df.emotion[i] == 1:
            label = "1"
        elif df.emotion[i] == 2:
            label = "2"
        elif df.emotion[i] == 3:
            label = "3"
        elif df.emotion[i] == 4:
            label = "4"
        elif df.emotion[i] == 5:
            label = "5"
        elif df.emotion[i] == 6:
            label = "6"
        elif df.emotion[i] == 7:
            label = "7"
        elif df.emotion[i] == 8:
            label = "8"
        else:
            label = "_none"

        labels.append(label)

    df['label'] = labels
    return df

basepath = r"C:\Users\adwit\Downloads\audio_speech_actors_01-24"
df = metadata(basepath)
print(df.head())


                                                path  source  actor gender  \
0  C:\Users\adwit\Downloads\audio_speech_actors_0...       1      1   male   
1  C:\Users\adwit\Downloads\audio_speech_actors_0...       1      1   male   
2  C:\Users\adwit\Downloads\audio_speech_actors_0...       1      1   male   
3  C:\Users\adwit\Downloads\audio_speech_actors_0...       1      1   male   
4  C:\Users\adwit\Downloads\audio_speech_actors_0...       1      1   male   

   intensity  statement  repetition  emotion label  
0          0          0           0        1     1  
1          0          0           1        1     1  
2          0          1           0        1     1  
3          0          1           1        1     1  
4          0          0           0        2     2  


In [6]:
def spec_augment(spec: np.ndarray, num_mask=1,
                 freq_masking_max_percentage=0.10, time_masking_max_percentage=0.15):

    spec = spec.copy()
    for i in range(num_mask):
        all_frames_num, all_freqs_num = spec.shape
        freq_percentage = random.uniform(0, freq_masking_max_percentage)

        num_freqs_to_mask = int(freq_percentage * all_freqs_num)
        f0 = np.random.uniform(low=0.0, high=all_freqs_num - num_freqs_to_mask)
        f0 = int(f0)
        spec[:, f0:f0 + num_freqs_to_mask] = 0.000

        time_percentage = random.uniform(0.0, time_masking_max_percentage)

        num_frames_to_mask = int(time_percentage * all_frames_num)
        t0 = np.random.uniform(low=0.0, high=all_frames_num - num_frames_to_mask)
        t0 = int(t0)
        spec[t0:t0 + num_frames_to_mask, :] = 0.000

    return spec

In [7]:
class AugmentedSpectrograms():
    def __init__(self, df, outputpath, mel=True, mfcc=False, spectral=False, mfccbanks=20, n_mels=128):
        self.df = df
        self.mel = mel
        self.mfcc = mfcc
        self.spectral = spectral
        self.mfccbanks = mfccbanks
        self.n_mels = n_mels
        self.outputpath = outputpath

    def get_augmented_spectrograms(self):
        for index, row in tqdm(self.df.iterrows(), total=self.df.shape[0]):
            emotion = row['label']
            path = os.path.join(self.outputpath, emotion)

            if not os.path.exists(path):
                os.makedirs(path)

            x, sample_rate = librosa.load(row['path'])
            original_filename = f'{emotion}_{index + 1}'
            self.generate_augmented_spectrogram(x, sample_rate, path, original_filename)
            self.generate_augmented_spectrogram(x, sample_rate, path, original_filename)

    def generate_augmented_spectrogram(self, x, sample_rate, path, original_filename):
        if self.mel:
            mel_features = librosa.feature.melspectrogram(y=x, sr=sample_rate, n_mels=self.n_mels)
            log_mel_features = librosa.power_to_db(mel_features, ref=np.max)

            for i in range(2):  # Generate 2 augmented samples
                augmented_spec = spec_augment(log_mel_features)
                self.save_spectrogram(augmented_spec, sample_rate, path, original_filename)

    def save_spectrogram(self, features, sample_rate, path, original_filename):
        fig = plt.figure(figsize=(12, 4))
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        librosa.display.specshow(features, sr=sample_rate, x_axis='time', y_axis='mel')

        save_path = os.path.join(path, f'{original_filename}.jpg')
        plt.savefig(save_path)
        plt.close()

outputpath = r"C:\Users\adwit\Downloads\specti-newaug"  
augmented_spectrogram_generator = AugmentedSpectrograms(df, outputpath=outputpath, mel=True, mfcc=False, spectral=False)
augmented_spectrogram_generator.get_augmented_spectrograms()


100%|██████████████████████████████████████████████████████████████████████████████| 1440/1440 [38:33<00:00,  1.61s/it]


In [13]:
from PIL import Image

def load_chromagram(chromagram_path):
    chromagram = Image.open(chromagram_path)
    return chromagram

def save_chromagram(chromagram, chromagram_path):
    chromagram.save(chromagram_path)



In [20]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm

class ChromagramAugmentation():
    def __init__(self, chromagram_dir, noise_mean=0, noise_std=0.005):
        self.chromagram_dir = chromagram_dir
        self.noise_mean = noise_mean
        self.noise_std = noise_std

    def augment_with_noise(self):
        target_total_samples = 4320
        original_samples = 1440
        augmented_samples_needed = target_total_samples - original_samples
        current_total_samples = 0

        for emotion_dir in os.listdir(self.chromagram_dir):
            emotion_path = os.path.join(self.chromagram_dir, emotion_dir)
            if os.path.isdir(emotion_path):
                chromagram_files = [f for f in os.listdir(emotion_path) if f.endswith('.jpg')]
                num_original_chromagrams = len(chromagram_files)

                for idx, chromagram_file in tqdm(enumerate(chromagram_files), total=num_original_chromagrams, desc=emotion_dir):
                    chromagram_path = os.path.join(emotion_path, chromagram_file)
                    chromagram = load_chromagram(chromagram_path)

                    num_augmentations = min(2, int(np.ceil(augmented_samples_needed / num_original_chromagrams)))

                    for i in range(num_augmentations):
                        augmented_chromagram = self.add_noise_to_chromagram(chromagram)
                        new_index = original_samples + current_total_samples + i + 1
                        augmented_chromagram_filename = f"{emotion_dir}_{new_index}.jpg"
                        augmented_chromagram_path = os.path.join(emotion_path, augmented_chromagram_filename)
                        save_chromagram(augmented_chromagram, augmented_chromagram_path)

                    current_total_samples += num_augmentations
                    if current_total_samples >= augmented_samples_needed:
                        break

                if current_total_samples >= augmented_samples_needed:
                    break

    def add_noise_to_chromagram(self, chromagram):
        width, height = chromagram.size
        noise = np.random.normal(self.noise_mean, self.noise_std, (height, width, 3))
        noisy_chromagram_array = np.array(chromagram) + noise
        noisy_chromagram_array = np.clip(noisy_chromagram_array, 0, 255).astype(np.uint8)
        return Image.fromarray(noisy_chromagram_array)

# Example usage
chromagram_dir = r"C:\Users\adwit\Downloads\chroma-augm"
augmenter = ChromagramAugmentation(chromagram_dir)
augmenter.augment_with_noise()


1: 100%|███████████████████████████████████████████████████████████████████████████████| 96/96 [00:15<00:00,  6.03it/s]
2: 100%|█████████████████████████████████████████████████████████████████████████████| 192/192 [00:32<00:00,  5.91it/s]
3: 100%|█████████████████████████████████████████████████████████████████████████████| 192/192 [00:32<00:00,  5.90it/s]
4: 100%|█████████████████████████████████████████████████████████████████████████████| 192/192 [00:35<00:00,  5.39it/s]
5: 100%|█████████████████████████████████████████████████████████████████████████████| 192/192 [00:38<00:00,  5.02it/s]
6: 100%|█████████████████████████████████████████████████████████████████████████████| 192/192 [00:34<00:00,  5.55it/s]
7: 100%|█████████████████████████████████████████████████████████████████████████████| 192/192 [00:35<00:00,  5.35it/s]
8: 100%|█████████████████████████████████████████████████████████████████████████████| 192/192 [00:34<00:12,  5.19it/s]


In [4]:
# Train the model
model.fit([X_train_s, X_train_s], y_train_s, batch_size=32, epochs=100, validation_data=([X_test_s, X_test_s], y_test_s))

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 

<keras.src.callbacks.History at 0x1d1a73c3a10>


import os
import random
import sys
import glob
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import warnings
from sklearn.model_selection import StratifiedShuffleSplit
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
from tqdm import tqdm
import plotly.offline as py

warnings.simplefilter("ignore")
pd.set_option('display.max_columns', None)
py.init_notebook_mode(connected=True)

one augmented sample

In [7]:
class AugmentedSpectrograms():
    def __init__(self, df, outputpath, mel=True, mfcc=False, spectral=False, mfccbanks=20, n_mels=128):
        self.df = df
        self.mel = mel
        self.mfcc = mfcc
        self.spectral = spectral
        self.mfccbanks = mfccbanks
        self.n_mels = n_mels
        self.outputpath = outputpath

    def get_augmented_spectrograms(self):
        for index, row in tqdm(self.df.iterrows(), total=self.df.shape[0]):
            emotion = row['label']
            path = os.path.join(self.outputpath, emotion)

            if not os.path.exists(path):
                os.makedirs(path)

            x, sample_rate = librosa.load(row['path'])
            original_filename = f'{emotion}_{index + 1}'
            self.generate_augmented_spectrogram(x, sample_rate, path, original_filename)

    def generate_augmented_spectrogram(self, x, sample_rate, path, original_filename):
        if self.mel:
            mel_features = librosa.feature.melspectrogram(y=x, sr=sample_rate, n_mels=self.n_mels)
            log_mel_features = librosa.power_to_db(mel_features, ref=np.max)

            # Apply SpecAugment to the mel spectrogram
            augmented_spec = spec_augment(log_mel_features)

            # Save the augmented mel spectrogram
            self.save_spectrogram(augmented_spec, sample_rate, path, original_filename)

    def save_spectrogram(self, features, sample_rate, path, original_filename):
        fig = plt.figure(figsize=(12, 4))
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        librosa.display.specshow(features, sr=sample_rate, x_axis='time', y_axis='mel')

        save_path = os.path.join(path, f'{original_filename}.jpg')
        plt.savefig(save_path)
        plt.close()

# Example usage for generating augmented spectrograms
outputpath = r"C:\Users\adwit\Downloads\new_spectograms"  # Change this to the desired output path
augmented_spectrogram_generator = AugmentedSpectrograms(df, outputpath=outputpath, mel=True, mfcc=False, spectral=False)

# Generate augmented spectrograms
augmented_spectrogram_generator.get_augmented_spectrograms()


100%|██████████████████████████████████████████████████████████████████████████████| 1440/1440 [14:00<00:00,  1.71it/s]


In [12]:
class AugmentedSpectrograms():
    def __init__(self, df, outputpath, mel=True, mfcc=False, spectral=False, mfccbanks=20, n_mels=128):
        self.df = df
        self.mel = mel
        self.mfcc = mfcc
        self.spectral = spectral
        self.mfccbanks = mfccbanks
        self.n_mels = n_mels
        self.outputpath = outputpath

    def get_augmented_spectrograms(self):
        classes = self.df['label'].unique()  # Get all unique classes
        for emotion in classes:
            class_df = self.df[self.df['label'] == emotion]
            path = os.path.join(self.outputpath, emotion)

            if not os.path.exists(path):
                os.makedirs(path)

            for index, row in tqdm(class_df.iterrows(), total=class_df.shape[0]):
                x, sample_rate = librosa.load(row['path'])
                original_filename = f'{emotion}_{index + 1}'
                self.generate_augmented_spectrogram(x, sample_rate, path, original_filename)

    def generate_augmented_spectrogram(self, x, sample_rate, path, original_filename):
        if self.mel:
            mel_features = librosa.feature.melspectrogram(y=x, sr=sample_rate, n_mels=self.n_mels)
            log_mel_features = librosa.power_to_db(mel_features, ref=np.max)

            # Apply SpecAugment to the mel spectrogram
            augmented_spec = spec_augment(log_mel_features)

            # Save the augmented mel spectrogram
            self.save_spectrogram(augmented_spec, sample_rate, path, original_filename)

    def save_spectrogram(self, features, sample_rate, path, original_filename):
        fig = plt.figure(figsize=(12, 4))
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        librosa.display.specshow(features, sr=sample_rate, x_axis='time', y_axis='mel')

        save_path = os.path.join(path, f'{original_filename}.jpg')
        plt.savefig(save_path)
        plt.close()

# Example usage for generating augmented spectrograms
outputpath = r"C:\Users\adwit\Downloads\new_spectograms"  # Change this to the desired output path
augmented_spectrogram_generator = AugmentedSpectrograms(df, outputpath=outputpath, mel=True, mfcc=False, spectral=False)

# Generate augmented spectrograms
augmented_spectrogram_generator.get_augmented_spectrograms()


100%|██████████████████████████████████████████████████████████████████████████████████| 96/96 [00:06<00:00, 13.99it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 192/192 [00:20<00:00,  9.43it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 192/192 [00:20<00:00,  9.49it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 192/192 [00:18<00:00, 10.49it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 192/192 [00:16<00:00, 11.66it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 192/192 [00:15<00:00, 12.37it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 192/192 [00:15<00:00, 12.78it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 192/192 [00:14<00:00, 12.82it/s]


In [6]:
class AugmentedSpectrograms():
    def __init__(self, df, outputpath, mel=True, mfcc=False, spectral=False, mfccbanks=20, n_mels=128):
        self.df = df
        self.mel = mel
        self.mfcc = mfcc
        self.spectral = spectral
        self.mfccbanks = mfccbanks
        self.n_mels = n_mels
        self.outputpath = outputpath

    def get_augmented_spectrograms(self):
        for index, row in tqdm(self.df.iterrows(), total=self.df.shape[0]):
            emotion = row['label']
            path = os.path.join(self.outputpath, emotion)

            if not os.path.exists(path):
                os.makedirs(path)

            x, sample_rate = librosa.load(row['path'])
            original_filename = f'{emotion}_{index + 1}'
            self.generate_augmented_spectrogram(x, sample_rate, path, original_filename)
            self.generate_augmented_spectrogram(x, sample_rate, path, original_filename)

    def generate_augmented_spectrogram(self, x, sample_rate, path, original_filename):
        if self.mel:
            mel_features = librosa.feature.melspectrogram(y=x, sr=sample_rate, n_mels=self.n_mels)
            log_mel_features = librosa.power_to_db(mel_features, ref=np.max)

            # Apply SpecAugment to the mel spectrogram
            for _ in range(2):  # Generate 2 augmented samples
                augmented_spec = spec_augment(log_mel_features)

                # Save the augmented mel spectrogram
                self.save_spectrogram(augmented_spec, sample_rate, path, original_filename)

    def save_spectrogram(self, features, sample_rate, path, original_filename):
        fig = plt.figure(figsize=(12, 4))
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        librosa.display.specshow(features, sr=sample_rate, x_axis='time', y_axis='mel')

        save_path = os.path.join(path, f'{original_filename}.jpg')
        plt.savefig(save_path)
        plt.close()

# Example usage for generating augmented spectrograms
outputpath = r"C:\Users\adwit\Downloads\specti-new"  # Change this to the desired output path
augmented_spectrogram_generator = AugmentedSpectrograms(df, outputpath=outputpath, mel=True, mfcc=False, spectral=False)

# Generate augmented spectrograms
augmented_spectrogram_generator.get_augmented_spectrograms()


100%|██████████████████████████████████████████████████████████████████████████████| 1440/1440 [08:51<00:00,  2.71it/s]


In [None]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm

class ChromagramAugmentation():
    def __init__(self, chromagram_dir, noise_mean=0, noise_std=0.005):
        self.chromagram_dir = chromagram_dir
        self.noise_mean = noise_mean
        self.noise_std = noise_std

    def augment_with_noise(self):
        num_original_chromagrams = 1450  # Update this with your actual number of original chromagrams
        desired_total_chromagrams = 4196
        num_augmentations_needed = desired_total_chromagrams - num_original_chromagrams

        for emotion_dir in os.listdir(self.chromagram_dir):
            emotion_path = os.path.join(self.chromagram_dir, emotion_dir)
            if os.path.isdir(emotion_path):
                chromagram_files = [f for f in os.listdir(emotion_path) if f.endswith('.jpg')]

                for idx, chromagram_file in tqdm(enumerate(chromagram_files), total=num_original_chromagrams, desc=emotion_dir):
                    chromagram_path = os.path.join(emotion_path, chromagram_file)
                    chromagram = load_chromagram(chromagram_path)

                    # Calculate how many times to augment this chromagram
                    num_augmentations = int(np.ceil(num_augmentations_needed / num_original_chromagrams))

                    # Augment the chromagram
                    for i in range(num_augmentations):
                        augmented_chromagram = self.add_noise_to_chromagram(chromagram)

                        # Determine the new index for augmented chromagrams
                        new_index = num_original_chromagrams + idx * num_augmentations + i + 1

                        # Save the augmented chromagram
                        augmented_chromagram_filename = f"{emotion_dir}_{new_index}.jpg"
                        augmented_chromagram_path = os.path.join(emotion_path, augmented_chromagram_filename)
                        save_chromagram(augmented_chromagram, augmented_chromagram_path)

                        num_augmentations_needed -= 1
                        if num_augmentations_needed == 0:
                            break

                    if num_augmentations_needed == 0:
                        break

    def add_noise_to_chromagram(self, chromagram):
        width, height = chromagram.size
        noise = np.random.normal(self.noise_mean, self.noise_std, (height, width, 3))
        noisy_chromagram_array = np.array(chromagram) + noise
        noisy_chromagram_array = np.clip(noisy_chromagram_array, 0, 255).astype(np.uint8)
        return Image.fromarray(noisy_chromagram_array)

chromagram_dir = r"C:\Users\adwit\Downloads\chromagrams"
augmenter = ChromagramAugmentation(chromagram_dir)
augmenter.augment_with_noise()


: 

In [None]:
def spec_augment(spec: np.ndarray, num_mask=1,
                 freq_masking_max_percentage=0.10, time_masking_max_percentage=0.15):

    spec = spec.copy()
    for i in range(num_mask):
        all_frames_num, all_freqs_num = spec.shape
        freq_percentage = random.uniform(0, freq_masking_max_percentage)

        num_freqs_to_mask = int(freq_percentage * all_freqs_num)
        f0 = np.random.uniform(low=0.0, high=all_freqs_num - num_freqs_to_mask)
        f0 = int(f0)
        spec[:, f0:f0 + num_freqs_to_mask] = 0.000

        time_percentage = random.uniform(0.0, time_masking_max_percentage)

        num_frames_to_mask = int(time_percentage * all_frames_num)
        t0 = np.random.uniform(low=0.0, high=all_frames_num - num_frames_to_mask)
        t0 = int(t0)
        spec[t0:t0 + num_frames_to_mask, :] = 0.000

    return spec

: 

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
dataset_dir = r"C:\Users\adwit\Downloads\specti-newaug"

spectrograms = []
labels = []
for emotion_category in os.listdir(dataset_dir):
    category_dir = os.path.join(dataset_dir, emotion_category)
    if os.path.isdir(category_dir):
        for filename in os.listdir(category_dir):
            img = load_img(os.path.join(category_dir, filename), target_size=(256,256,3))
            img_array = img_to_array(img)
            spectrograms.append(img_array)
            labels.append(emotion_category)
spectrograms = np.array(spectrograms)
labels = np.array(labels)

X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(spectrograms, labels, test_size=0.1, random_state=42)
label_binarizer = LabelBinarizer()
y_train_s = label_binarizer.fit_transform(y_train_s)
y_test_s = label_binarizer.transform(y_test_s)



: 

In [None]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout
input_shape = (256,256, 3)
#sequential model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

input_layer = Input(shape=input_shape)

x = model(input_layer)

concatenated_input = Concatenate()([x, x])
num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(concatenated_input)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


: 

In [None]:

model_top.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_top.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_test, y_test))

: 

In [None]:
# Train the model
history = model.fit(X_train_s, y_train_s, epochs=100, batch_size=32, validation_data=(X_test_s, y_test_s))
from tensorflow.keras.models import save_model

# Assuming you have trained your CNN model and it's stored in a variable called 'model'

# Extract loss values from history
import matplotlib.pyplot as plt
train_loss = history.history['loss']
val_loss = history.history['val_loss']

# Plot loss vs. epochs
plt.plot(range(1, len(train_loss) + 1), train_loss, label='Training Loss')
plt.plot(range(1, len(val_loss) + 1), val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss vs. Epochs')
plt.legend()
plt.show()

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have trained your CNN model and obtained predictions
# Replace these lines with your actual prediction code
# model = ... (your CNN model)
y_pred = model.predict(X_test_s)  # Example of getting predictions, adjust according to your model

# Calculate accuracy
accuracy = accuracy_score(np.argmax(y_test_s, axis=1), np.argmax(y_pred_s, axis=1))

# Calculate precision, recall, and F1-score
precision = precision_score(np.argmax(y_test_s, axis=1), np.argmax(y_pred, axis=1), average='weighted')
recall = recall_score(np.argmax(y_test_s, axis=1), np.argmax(y_pred, axis=1), average='weighted')
f1 = f1_score(np.argmax(y_test_s, axis=1), np.argmax(y_pred, axis=1), average='weighted')

print("Accuracy:", accuracy)
print("Average Precision:", precision)
print("Average Recall:", recall)
print("Average F1-score:", f1)
import matplotlib.pyplot as plt

# Extract accuracy values from history
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Plot accuracy vs. epochs
plt.plot(range(1, len(train_accuracy) + 1), train_accuracy, label='Train')
plt.plot(range(1, len(val_accuracy) + 1), val_accuracy, label='Test')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epochs')
plt.legend()
plt.show()



: 

In [None]:
# Evaluate the model
loss, accuracy = model_top.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

: 

Chromagrams

In [None]:
import os
import pandas as pd

def metadata(basepath):
    df = pd.DataFrame(columns=['path', 'source', 'actor', 'gender', 'intensity', 'statement', 'repetition', 'emotion'])
    count = 0

    for sub_dir in os.listdir(basepath):
        sub_dir_path = os.path.join(basepath, sub_dir)

        if os.path.isdir(sub_dir_path):
            for f in os.listdir(sub_dir_path):
                filename = f.split('.')[0].split('-')
                if len(filename) == 7:
                    path = os.path.join(sub_dir_path, f)
                    src = int(filename[1])
                    actor = int(filename[-1].split()[0])  # Extract actor removing extra characters
                    emotion = int(filename[2])
                    gender = "female" if int(actor) % 2 == 0 else "male"
                    intensity = 0 if filename[3] == '01' else 1
                    statement = 0 if filename[4] == '01' else 1
                    repeat = 0 if filename[5] == '01' else 1

                    df.loc[count] = [path, src, actor, gender, intensity, statement, repeat, emotion]
                    count += 1

    labels = []
    for i in range(len(df)):
        if df.emotion[i] == 1:
            label = "1"
        elif df.emotion[i] == 2:
            label = "2"
        elif df.emotion[i] == 3:
            label = "3"
        elif df.emotion[i] == 4:
            label = "4"
        elif df.emotion[i] == 5:
            label = "5"
        elif df.emotion[i] == 6:
            label = "6"
        elif df.emotion[i] == 7:
            label = "7"
        elif df.emotion[i] == 8:
            label = "8"
        else:
            label = "_none"

        labels.append(label)

    df['label'] = labels
    return df

# Example usage
basepath = r"C:\Users\adwit\Downloads\audio_speech_actors_01-24"
df = metadata(basepath)
print(df.head())


: 

In [None]:
class Spectrograms():
    def __init__(self, df, outputpath, sample=False, augmentation=False, mel=False, mfcc=False, spectral=False, chroma=False, mfccbanks=20, n_mels=128):
        self.df = df
        self.augmentation = augmentation
        self.mel = mel
        self.mfcc = mfcc
        self.spectral = spectral
        self.chroma = chroma
        self.mfccbanks = mfccbanks
        self.n_mels = n_mels
        self.outputpath = outputpath
        self.sample = sample

    def get_spectrograms(self):
        if self.sample:
            x, sample_rate = librosa.load(self.df['path'].iloc[0])
            self.generate(x, sample_rate, '', 0, self.df['label'].iloc[0])

        else:
            for index, row in tqdm(self.df.iterrows(), total=self.df.shape[0]):
                emotion = row['label']
                path = os.path.join(self.outputpath, emotion)

                if not os.path.exists(path):
                    os.makedirs(path)

                x, sample_rate = librosa.load(row['path'])
                self.generate(x, sample_rate, path, index, emotion)

    def generate(self, x, sample_rate, path, count, emotion):
        if self.mel:
            mel_features = librosa.feature.melspectrogram(y=x, sr=sample_rate, n_mels=self.n_mels)
            log_mel_features = librosa.power_to_db(mel_features, ref=np.max)
            fig = plt.figure(figsize=(12, 4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(log_mel_features, sr=sample_rate, x_axis='time', y_axis='mel')
            if self.sample:
                plt.show()
            else:
                plt.savefig(os.path.join(path, f'{emotion}_{count}.jpg'))
                plt.close()

        if self.mfcc:
            mfcc_features = librosa.feature.mfcc(x, sr=sample_rate, n_mfcc=self.mfccbanks)
            fig = plt.figure(figsize=(12, 4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(mfcc_features, sr=sample_rate, x_axis='time', y_axis='mel')
            if self.sample:
                plt.show()
            else:
                plt.savefig(os.path.join(path, f'{emotion}_{count}_mfcc.jpg'))
                plt.close()

        if self.spectral:
            spectral_features = librosa.feature.spectral_contrast(x, sr=sample_rate)
            fig = plt.figure(figsize=(12, 4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(spectral_features, sr=sample_rate, x_axis='time', y_axis='mel')
            if self.sample:
                plt.show()
            else:
                plt.savefig(os.path.join(path, f'{emotion}_{count}_spectral.jpg'))
                plt.close()

        if self.chroma:
            chroma_features = librosa.feature.chroma_stft(y=x, sr=sample_rate)
            fig = plt.figure(figsize=(12, 4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(chroma_features, sr=sample_rate, x_axis='time', y_axis='chroma')
            if self.sample:
                plt.show()
            else:
                plt.savefig(os.path.join(path, f'{emotion}_{count}.jpg'))
                plt.close()


: 

In [None]:
spectrogram_generator = Spectrograms(df, outputpath=r"C:\Users\adwit\Downloads\spectogramsnewest", sample=False,mel=True)
spectrogram_generator.get_spectrograms()

: 

In [None]:
import os

def count_images(directory):
    total_images = 0
    subdirectories = []

    # Iterate over all items in the directory
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)

        # If it's a file and has an image extension, count it
        if os.path.isfile(item_path) and item.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            total_images += 1
        # If it's a directory, add it to the subdirectories list
        elif os.path.isdir(item_path):
            subdirectories.append(item)

    # Recursively count images in subdirectories
    for subdir in subdirectories:
        subdir_path = os.path.join(directory, subdir)
        total_images += count_images(subdir_path)

    return total_images

def count_shape_of_directory(directory):
    subdirectories = []
    total_images = 0

    # Iterate over all items in the directory
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)

        # If it's a directory, add it to the subdirectories list
        if os.path.isdir(item_path):
            subdirectories.append(item)
        elif os.path.isfile(item_path) and item.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            total_images += 1
    for subdir in subdirectories:
        subdir_path = os.path.join(directory, subdir)
        total_images += count_images(subdir_path)

    return len(subdirectories), total_images
directory_path = r"C:\Users\adwit\Downloads\spectogramsnewest"
num_subdirectories, total_images = count_shape_of_directory(directory_path)
print(f"Number of subdirectories: {num_subdirectories}")
print(f"Total number of images: {total_images}")


: 

In [None]:
def spec_augment(spec: np.ndarray, num_mask=1,
                 freq_masking_max_percentage=0.10, time_masking_max_percentage=0.15):

    spec = spec.copy()
    for i in range(num_mask):
        all_frames_num, all_freqs_num = spec.shape
        freq_percentage = random.uniform(0, freq_masking_max_percentage)

        num_freqs_to_mask = int(freq_percentage * all_freqs_num)
        f0 = np.random.uniform(low=0.0, high=all_freqs_num - num_freqs_to_mask)
        f0 = int(f0)
        spec[:, f0:f0 + num_freqs_to_mask] = 0.000

        time_percentage = random.uniform(0.0, time_masking_max_percentage)

        num_frames_to_mask = int(time_percentage * all_frames_num)
        t0 = np.random.uniform(low=0.0, high=all_frames_num - num_frames_to_mask)
        t0 = int(t0)
        spec[t0:t0 + num_frames_to_mask, :] = 0.000

    return spec

: 

: 

In [None]:
import os

def count_images(directory):
    total_images = 0
    subdirectories = []
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isfile(item_path) and item.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            total_images += 1
        elif os.path.isdir(item_path):
            subdirectories.append(item)
    for subdir in subdirectories:
        subdir_path = os.path.join(directory, subdir)
        total_images += count_images(subdir_path)

    return total_images

def count_shape_of_directory(directory):
    subdirectories = []
    total_images = 0
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isdir(item_path):
            subdirectories.append(item)
        elif os.path.isfile(item_path) and item.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            total_images += 1
    for subdir in subdirectories:
        subdir_path = os.path.join(directory, subdir)
        total_images += count_images(subdir_path)

    return len(subdirectories), total_images
directory_path = r"C:\Users\adwit\Downloads\chroma-augm"
num_subdirectories, total_images = count_shape_of_directory(directory_path)
print(f"Number of subdirectories: {num_subdirectories}")
print(f"Total number of images: {total_images}")


: 

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer

dataset_dir = r"C:\Users\adwit\Downloads\chroma-augm"

chromagrams = []
labels = []

for emotion_category in os.listdir(dataset_dir):
    category_dir = os.path.join(dataset_dir, emotion_category)
    if os.path.isdir(category_dir):
        for filename in os.listdir(category_dir):
            img = load_img(os.path.join(category_dir, filename), target_size=(256,256,3))
            img_array = img_to_array(img)
            chromagrams.append(img_array)
            labels.append(emotion_category)

chromagrams = np.array(chromagrams)
labels = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(chromagrams, labels, test_size=0.1, random_state=42)

label_binarizer = LabelBinarizer()
y_train = label_binarizer.fit_transform(y_train)
y_test = label_binarizer.transform(y_test)



: 

In [None]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout
input_shape = (256,256, 3)
#sequential model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

input_layer = Input(shape=input_shape)

x = model(input_layer)

concatenated_input = Concatenate()([x, x])
num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(concatenated_input)
model_top = Model(inputs=input_layer, outputs=output_layer)
model_top.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_top.summary()


: 

In [None]:

model_top.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_top.fit(X_train, y_train, batch_size=32, epochs=20, validation_data=(X_test, y_test))

: 

In [None]:

model_top.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_top.fit(X_train, y_train, batch_size=32, epochs=30, validation_data=(X_test, y_test))

: 

Spectograms-100 epochs

In [None]:

model_top.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_top.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_test, y_test))

: 

In [None]:
# Evaluate the model
loss, accuracy = model_top.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

: 

Concatenating outputs from two different layers

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
dataset_dir = r"C:\Users\adwit\Downloads\specti-newaug"

spectrograms = []
labels = []
for emotion_category in os.listdir(dataset_dir):
    category_dir = os.path.join(dataset_dir, emotion_category)
    if os.path.isdir(category_dir):
        for filename in os.listdir(category_dir):
            img = load_img(os.path.join(category_dir, filename), target_size=(150,150,3))
            img_array = img_to_array(img)
            spectrograms.append(img_array)
            labels.append(emotion_category)
spectrograms = np.array(spectrograms)
labels = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(spectrograms, labels, test_size=0.1, random_state=42)
label_binarizer = LabelBinarizer()
y_train = label_binarizer.fit_transform(y_train)
y_test = label_binarizer.transform(y_test)



: 

In [None]:


from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout
input_shape = (150, 150, 3)  

input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu')(input_layer1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(2, 2))(x1)

input_layer2 = Input(shape=input_shape)
x2 = Conv2D(64, (3, 3), activation='relu')(input_layer2)
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(2, 2))(x2)

concatenated_input = Concatenate()([x1, x2])

flatten_layer = Flatten()(concatenated_input)

x = Dense(128, activation='relu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

# Output layer
num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


: 

In [None]:
# Train the model
model.fit([X_train, X_train], y_train, batch_size=32, epochs=50, validation_data=([X_test, X_test], y_test))

: 

Chromagrams

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
dataset_dir = r"C:\Users\adwit\Downloads\chroma-augm"

spectrograms = []
labels = []
for emotion_category in os.listdir(dataset_dir):
    category_dir = os.path.join(dataset_dir, emotion_category)
    if os.path.isdir(category_dir):
        for filename in os.listdir(category_dir):
            img = load_img(os.path.join(category_dir, filename), target_size=(150,150,3))
            img_array = img_to_array(img)
            spectrograms.append(img_array)
            labels.append(emotion_category)
spectrograms = np.array(spectrograms)
labels = np.array(labels)

X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(spectrograms, labels, test_size=0.1, random_state=42)
label_binarizer = LabelBinarizer()
y_train_c = label_binarizer.fit_transform(y_train_c)
y_test_c = label_binarizer.transform(y_test_c)



: 

In [None]:


from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout
input_shape = (150, 150, 3)  

input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu')(input_layer1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(2, 2))(x1)

input_layer2 = Input(shape=input_shape)
x2 = Conv2D(64, (3, 3), activation='relu')(input_layer2)
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(2, 2))(x2)

concatenated_input = Concatenate()([x1, x2])

flatten_layer = Flatten()(concatenated_input)

x = Dense(128, activation='relu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

# Output layer
num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


: 

In [None]:
# Train the model
model.fit([X_train_c, X_train_c], y_train_c, batch_size=32, epochs=100, validation_data=([X_test_c, X_test_c], y_test_c))

: 

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Make predictions on the test set
y_pred = model.predict([X_test_c,X_test_c])

# Convert the one-hot encoded labels back to original labels
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test_c, axis=1)
from sklearn.metrics import accuracy_score

# Compute accuracy
accuracy = accuracy_score(y_true_labels, y_pred_labels)
print("Accuracy:", accuracy)

# Print classification report
print("Classification Report:")
print(classification_report(y_true_labels, y_pred_labels))

# Print confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_true_labels, y_pred_labels))


: 

In [None]:


from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout
input_shape = (256,256, 3)  

input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu')(input_layer1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(2, 2))(x1)

input_layer2 = Input(shape=input_shape)
x2 = Conv2D(64, (3, 3), activation='relu')(input_layer2)
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(2, 2))(x2)

concatenated_input = Concatenate()([x1, x2])

flatten_layer = Flatten()(concatenated_input)

x = Dense(128, activation='relu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

# Output layer
num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


: 

strides(spectograms)

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
dataset_dir = r"C:\Users\adwit\Downloads\specti-newaug"

spectrograms = []
labels = []
for emotion_category in os.listdir(dataset_dir):
    category_dir = os.path.join(dataset_dir, emotion_category)
    if os.path.isdir(category_dir):
        for filename in os.listdir(category_dir):
            img = load_img(os.path.join(category_dir, filename), target_size=(150,150,3))
            img_array = img_to_array(img)
            spectrograms.append(img_array)
            labels.append(emotion_category)
spectrograms = np.array(spectrograms)
labels = np.array(labels)

X_train_s1, X_test_s1, y_train_s1, y_test_s1 = train_test_split(spectrograms, labels, test_size=0.1, random_state=42)
label_binarizer = LabelBinarizer()
y_train_s1 = label_binarizer.fit_transform(y_train_s1)
y_test_s1 = label_binarizer.transform(y_test_s1)



: 

In [None]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout

input_shape = (150,150, 3)  

input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu', strides=(2, 2), padding='valid')(input_layer1)  # Stride added here
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(2, 2), strides=(3, 3))(x1)  # Stride added here

input_layer2 = Input(shape=input_shape)
x2 = Conv2D(64, (3, 3), activation='relu', strides=(2, 2), padding='valid')(input_layer2)  # Stride added here
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(2, 2), strides=(3, 3))(x2)  # Stride added here

concatenated_input = Concatenate()([x1, x2])

flatten_layer = Flatten()(concatenated_input)

x = Dense(128, activation='relu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Dense(128, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

# Output layer
num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


: 

In [None]:
# Train the model
model.fit([X_train_s1, X_train_s1], y_train_s1, batch_size=32, epochs=50, validation_data=([X_test_s1, X_test_s1], y_test_s1))

: 

In [None]:
# Train the model
model.fit([X_train_s1, X_train_s1], y_train_s1, batch_size=32, epochs=100, validation_data=([X_test_s1, X_test_s1], y_test_s1))

: 

COMPLEX

SPECTOGRAMS

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
dataset_dir = r"C:\Users\adwit\Downloads\specti-newaug"

spectrograms = []
labels = []
for emotion_category in os.listdir(dataset_dir):
    category_dir = os.path.join(dataset_dir, emotion_category)
    if os.path.isdir(category_dir):
        for filename in os.listdir(category_dir):
            img = load_img(os.path.join(category_dir, filename), target_size=(224,224,3))
            img_array = img_to_array(img)
            spectrograms.append(img_array)
            labels.append(emotion_category)
spectrograms = np.array(spectrograms)
labels = np.array(labels)

X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(spectrograms, labels, test_size=0.1, random_state=42)
label_binarizer = LabelBinarizer()
y_train_s = label_binarizer.fit_transform(y_train_s)
y_test_s = label_binarizer.transform(y_test_s)



: 

In [None]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout

input_shape = (224,224, 3)

# First Input and Convolutional Block
input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x1)

# Second Input and Convolutional Block
input_layer2 = Input(shape=input_shape)
x2 = Conv2D(128, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer2)
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x2)

# Concatenation
concatenated_input = Concatenate()([x1, x2])

# Third Convolutional Block
x3 = Conv2D(256, (3, 3), activation='relu', padding='same')(concatenated_input)
x3 = BatchNormalization()(x3)
x3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x3)

# Flatten and Dense Layers
flatten_layer = Flatten()(x3)
x = Dense(128, activation='relu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

# Output layer
num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

# Model definition and compilation
model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


: 

COMBINED-NEWWW

In [None]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout
import numpy as np

input_shape = (224, 224, 3)

# Define the model architecture
input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x1)

input_layer2 = Input(shape=input_shape)
x2 = Conv2D(128, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer2)
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x2)

concatenated_input = Concatenate()([x1, x2])

x3 = Conv2D(256, (3, 3), activation='relu', padding='same')(concatenated_input)
x3 = BatchNormalization()(x3)
x3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x3)

flatten_layer = Flatten()(x3)
x = Dense(128, activation='relu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

# Original model
model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# New model to extract features
feature_extraction_model = Model(inputs=model.input, outputs=model.layers[-5].output)

# Extract features
features = feature_extraction_model.predict([spectrograms, spectrograms])

# Save features as .npy file
np.save(r"C:\Users\adwit\Downloads\combined_features\spectograms_feasnewestdd.npy", features)


: 

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer

dataset_dir = r"C:\Users\adwit\Downloads\chroma-augm"

chromagrams = []
labels = []

for emotion_category in os.listdir(dataset_dir):
    category_dir = os.path.join(dataset_dir, emotion_category)
    if os.path.isdir(category_dir):
        for filename in os.listdir(category_dir):
            img = load_img(os.path.join(category_dir, filename), target_size=(224,224,3))
            img_array = img_to_array(img)
            chromagrams.append(img_array)
            labels.append(emotion_category)

chromagrams = np.array(chromagrams)
labels = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(chromagrams, labels, test_size=0.1, random_state=42)

label_binarizer = LabelBinarizer()
y_train = label_binarizer.fit_transform(y_train)
y_test = label_binarizer.transform(y_test)



: 

little new

In [None]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout
import numpy as np

input_shape = (224, 224, 3)

# Define the model architecture
input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x1)

input_layer2 = Input(shape=input_shape)
x2 = Conv2D(128, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer2)
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x2)

concatenated_input = Concatenate()([x1, x2])

x3 = Conv2D(256, (3, 3), activation='relu', padding='same')(concatenated_input)
x3 = BatchNormalization()(x3)
x3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x3)

# New Convolutional Layer
x4 = Conv2D(512, (3, 3), activation='relu', padding='same')(x3)
x4 = BatchNormalization()(x4)
x4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x4)

flatten_layer = Flatten()(x4)
x = Dense(128, activation='relu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

# Original model
model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# New model to extract features
feature_extraction_model = Model(inputs=model.input, outputs=x4)

# Extract features
features = feature_extraction_model.predict([chromagrams, chromagrams])

# Save features as .npy file
np.save(r"C:\Users\adwit\Downloads\combined_features\chromagrams_feasnewestee.npy", features)
np.save(r"C:\Users\adwit\Downloads\combined_features\chromagrams_labelsnewestee.npy", labels)


: 

In [None]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout
import numpy as np

input_shape = (224, 224, 3)

# Define the model architecture
input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x1)

input_layer2 = Input(shape=input_shape)
x2 = Conv2D(128, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer2)
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x2)

concatenated_input = Concatenate()([x1, x2])

x3 = Conv2D(256, (3, 3), activation='relu', padding='same')(concatenated_input)
x3 = BatchNormalization()(x3)
x3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x3)

flatten_layer = Flatten()(x3)
x = Dense(128, activation='relu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

# Original model
model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# New model to extract features
feature_extraction_model = Model(inputs=model.input, outputs=model.layers[-5].output)

# Extract features
features = feature_extraction_model.predict([chromagrams, chromagrams])

# Save features as .npy file
np.save(r"C:\Users\adwit\Downloads\combined_features\chromagrams_feasnewestdd.npy", features)
np.save(r"C:\Users\adwit\Downloads\combined_features\chromagrams_labelsnewestdd.npy",labels)

: 

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split


import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
# Load extracted features from spectrograms and chromagrams
spectrogram_features = np.load(r"C:\Users\adwit\Downloads\combined_features\spectograms_feasnewestdd.npy")
chromagram_features = np.load(r"C:\Users\adwit\Downloads\combined_features\chromagrams_feasnewestdd.npy")

# Concatenate the features
concatenated_features = np.concatenate((spectrogram_features, chromagram_features), axis=1)

# Load the labels
labels = np.load(r"C:\Users\adwit\Downloads\combined_features\chromagrams_labelsnewestdd.npy")

label_binarizer = LabelBinarizer()
labels_encoded = label_binarizer.fit_transform(labels)
# Split the data into training and testing sets
X_train_co, X_test_co, y_train_co, y_test_co = train_test_split(concatenated_features, labels_encoded, test_size=0.1, random_state=42)



: 

In [None]:
import numpy as np

# Assuming x_train is your training data
print("Shape of x_train:", X_train_co.shape)


: 

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization

# Define and compile the model
model = Sequential([
    Dense(128, activation='relu', input_shape=(256,)),
    BatchNormalization(),  
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dense(8, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


: 

In [None]:
# Train the model
model.fit(X_train_co, y_train_co, epochs=100, batch_size=32, validation_data=(X_test_co, y_test_co))

: 

In [None]:
# Train the model
model.fit(X_train_co, y_train_co, epochs=200, batch_size=32, validation_data=(X_test_co, y_test_co))

: 

In [None]:
# Train the model
model.fit(X_train_co, y_train_co, epochs=300, batch_size=32, validation_data=(X_test_co, y_test_co))

: 

SPECTOGRAMS-ONE AUGMENTED SAMPLE

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
dataset_dir = r"C:\Users\adwit\Downloads\new_spectograms"

spectrograms = []
labels = []
for emotion_category in os.listdir(dataset_dir):
    category_dir = os.path.join(dataset_dir, emotion_category)
    if os.path.isdir(category_dir):
        for filename in os.listdir(category_dir):
            img = load_img(os.path.join(category_dir, filename), target_size=(256,256,3))
            img_array = img_to_array(img)
            spectrograms.append(img_array)
            labels.append(emotion_category)
spectrograms = np.array(spectrograms)
labels = np.array(labels)

X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(spectrograms, labels, test_size=0.1, random_state=42)
label_binarizer = LabelBinarizer()
y_train_s = label_binarizer.fit_transform(y_train_s)
y_test_s = label_binarizer.transform(y_test_s)



: 

In [None]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout

input_shape = (256,256, 3)

# First Input and Convolutional Block
input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x1)

# Second Input and Convolutional Block
input_layer2 = Input(shape=input_shape)
x2 = Conv2D(128, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer2)
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x2)

# Concatenation
concatenated_input = Concatenate()([x1, x2])

# Third Convolutional Block
x3 = Conv2D(256, (3, 3), activation='relu', padding='same')(concatenated_input)
x3 = BatchNormalization()(x3)
x3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x3)

# Flatten and Dense Layers
flatten_layer = Flatten()(x3)
x = Dense(128, activation='relu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

# Output layer
num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

# Model definition and compilation
model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


: 

In [None]:
# Train the model
model.fit(X_train_s, y_train_s, epochs=50, batch_size=32, validation_data=(X_test_s, y_test_s))

: 

In [None]:
# Train the model
model.fit([X_train_s, X_train_s], y_train_s, batch_size=32, epochs=50, validation_data=([X_test_s, X_test_s], y_test_s))

: 

one augmented sample

In [None]:

import os
import random
import sys
import glob
import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
import warnings
from sklearn.model_selection import StratifiedShuffleSplit
from scipy.fftpack import fft
from scipy import signal
from scipy.io import wavfile
from tqdm import tqdm
import plotly.offline as py

warnings.simplefilter("ignore")
pd.set_option('display.max_columns', None)
py.init_notebook_mode(connected=True)

: 

In [None]:
import os
import pandas as pd

def metadata(basepath):
    df = pd.DataFrame(columns=['path', 'source', 'actor', 'gender', 'intensity', 'statement', 'repetition', 'emotion'])
    count = 0

    for sub_dir in os.listdir(basepath):
        sub_dir_path = os.path.join(basepath, sub_dir)

        if os.path.isdir(sub_dir_path):
            for f in os.listdir(sub_dir_path):
                filename = f.split('.')[0].split('-')
                if len(filename) == 7:
                    path = os.path.join(sub_dir_path, f)
                    src = int(filename[1])
                    actor = int(filename[-1].split()[0])  # Extract actor removing extra characters
                    emotion = int(filename[2])
                    gender = "female" if int(actor) % 2 == 0 else "male"
                    intensity = 0 if filename[3] == '01' else 1
                    statement = 0 if filename[4] == '01' else 1
                    repeat = 0 if filename[5] == '01' else 1

                    df.loc[count] = [path, src, actor, gender, intensity, statement, repeat, emotion]
                    count += 1

    labels = []
    for i in range(len(df)):
        if df.emotion[i] == 1:
            label = "1"
        elif df.emotion[i] == 2:
            label = "2"
        elif df.emotion[i] == 3:
            label = "3"
        elif df.emotion[i] == 4:
            label = "4"
        elif df.emotion[i] == 5:
            label = "5"
        elif df.emotion[i] == 6:
            label = "6"
        elif df.emotion[i] == 7:
            label = "7"
        elif df.emotion[i] == 8:
            label = "8"
        else:
            label = "_none"

        labels.append(label)

    df['label'] = labels
    return df

# Example usage
basepath = r"C:\Users\adwit\Downloads\audio_speech_actors_01-24"
df = metadata(basepath)
print(df.head())


: 

In [None]:
class Spectrograms():
    def __init__(self, df, outputpath, sample=False, augmentation=False, mel=False, mfcc=False, spectral=False, chroma=False, mfccbanks=20, n_mels=128):
        self.df = df
        self.augmentation = augmentation
        self.mel = mel
        self.mfcc = mfcc
        self.spectral = spectral
        self.chroma = chroma
        self.mfccbanks = mfccbanks
        self.n_mels = n_mels
        self.outputpath = outputpath
        self.sample = sample

    def get_spectrograms(self):
        if self.sample:
            x, sample_rate = librosa.load(self.df['path'].iloc[0])
            self.generate(x, sample_rate, '', 0, self.df['label'].iloc[0])

        else:
            for index, row in tqdm(self.df.iterrows(), total=self.df.shape[0]):
                emotion = row['label']
                path = os.path.join(self.outputpath, emotion)

                if not os.path.exists(path):
                    os.makedirs(path)

                x, sample_rate = librosa.load(row['path'])
                self.generate(x, sample_rate, path, index, emotion)

    def generate(self, x, sample_rate, path, count, emotion):
        if self.mel:
            mel_features = librosa.feature.melspectrogram(y=x, sr=sample_rate, n_mels=self.n_mels)
            log_mel_features = librosa.power_to_db(mel_features, ref=np.max)
            fig = plt.figure(figsize=(12, 4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(log_mel_features, sr=sample_rate, x_axis='time', y_axis='mel')
            if self.sample:
                plt.show()
            else:
                plt.savefig(os.path.join(path, f'{emotion}_{count}.jpg'))
                plt.close()

        if self.mfcc:
            mfcc_features = librosa.feature.mfcc(x, sr=sample_rate, n_mfcc=self.mfccbanks)
            fig = plt.figure(figsize=(12, 4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(mfcc_features, sr=sample_rate, x_axis='time', y_axis='mel')
            if self.sample:
                plt.show()
            else:
                plt.savefig(os.path.join(path, f'{emotion}_{count}_mfcc.jpg'))
                plt.close()

        if self.spectral:
            spectral_features = librosa.feature.spectral_contrast(x, sr=sample_rate)
            fig = plt.figure(figsize=(12, 4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(spectral_features, sr=sample_rate, x_axis='time', y_axis='mel')
            if self.sample:
                plt.show()
            else:
                plt.savefig(os.path.join(path, f'{emotion}_{count}_spectral.jpg'))
                plt.close()

        if self.chroma:
            chroma_features = librosa.feature.chroma_stft(y=x, sr=sample_rate)
            fig = plt.figure(figsize=(12, 4))
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            librosa.display.specshow(chroma_features, sr=sample_rate, x_axis='time', y_axis='chroma')
            if self.sample:
                plt.show()
            else:
                plt.savefig(os.path.join(path, f'{emotion}_{count}.jpg'))
                plt.close()


: 

In [None]:
import os

def count_images(directory):
    total_images = 0
    subdirectories = []

    # Iterate over all items in the directory
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)

        # If it's a file and has an image extension, count it
        if os.path.isfile(item_path) and item.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            total_images += 1
        # If it's a directory, add it to the subdirectories list
        elif os.path.isdir(item_path):
            subdirectories.append(item)

    # Recursively count images in subdirectories
    for subdir in subdirectories:
        subdir_path = os.path.join(directory, subdir)
        total_images += count_images(subdir_path)

    return total_images

def count_shape_of_directory(directory):
    subdirectories = []
    total_images = 0

    # Iterate over all items in the directory
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)

        # If it's a directory, add it to the subdirectories list
        if os.path.isdir(item_path):
            subdirectories.append(item)
        elif os.path.isfile(item_path) and item.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            total_images += 1
    for subdir in subdirectories:
        subdir_path = os.path.join(directory, subdir)
        total_images += count_images(subdir_path)

    return len(subdirectories), total_images
directory_path = r"C:\Users\adwit\Downloads\new_spectograms"
num_subdirectories, total_images = count_shape_of_directory(directory_path)
print(f"Number of subdirectories: {num_subdirectories}")
print(f"Total number of images: {total_images}")


: 

In [None]:
import os

def count_images(directory):
    total_images = 0
    subdirectories = []

    # Iterate over all items in the directory
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)

        # If it's a file and has an image extension, count it
        if os.path.isfile(item_path) and item.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            total_images += 1
        # If it's a directory, add it to the subdirectories list
        elif os.path.isdir(item_path):
            subdirectories.append(item)

    # Recursively count images in subdirectories
    for subdir in subdirectories:
        subdir_path = os.path.join(directory, subdir)
        total_images += count_images(subdir_path)

    return total_images

def count_shape_of_directory(directory):
    subdirectories = []
    total_images = 0

    # Iterate over all items in the directory
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)

        # If it's a directory, add it to the subdirectories list
        if os.path.isdir(item_path):
            subdirectories.append(item)
        elif os.path.isfile(item_path) and item.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            total_images += 1
    for subdir in subdirectories:
        subdir_path = os.path.join(directory, subdir)
        total_images += count_images(subdir_path)

    return len(subdirectories), total_images
directory_path = r"C:\Users\adwit\Downloads\new_chromagrams"
num_subdirectories, total_images = count_shape_of_directory(directory_path)
print(f"Number of subdirectories: {num_subdirectories}")
print(f"Total number of images: {total_images}")


: 

In [None]:
spectrogram_generator = Spectrograms(df, outputpath=r"C:\Users\adwit\Downloads\new_spectograms", sample=False,mel=True)
spectrogram_generator.get_spectrograms()

: 

In [None]:
class AugmentedSpectrograms():
    def __init__(self, df, outputpath, mel=True, mfcc=False, spectral=False, mfccbanks=20, n_mels=128):
        self.df = df
        self.mel = mel
        self.mfcc = mfcc
        self.spectral = spectral
        self.mfccbanks = mfccbanks
        self.n_mels = n_mels
        self.outputpath = outputpath

    def get_augmented_spectrograms(self):
        for emotion in self.df['label'].unique():
            class_df = self.df[self.df['label'] == emotion]
            path = os.path.join(self.outputpath, emotion)

            if not os.path.exists(path):
                os.makedirs(path)

            print(f"Processing class {emotion}...")
            count = 0  # Initialize count for each emotion class
            for index, row in tqdm(class_df.iterrows(), total=class_df.shape[0]):
                x, sample_rate = librosa.load(row['path'])
                original_filename = f'{emotion}_{count + 1}'  # Start counting from 1
                print(f"Augmenting {original_filename}...")
                self.generate_augmented_spectrogram(x, sample_rate, path, original_filename)
                count += 1  # Increment count after processing each sample

    def generate_augmented_spectrogram(self, x, sample_rate, path, original_filename):
        if self.mel:
            mel_features = librosa.feature.melspectrogram(y=x, sr=sample_rate, n_mels=self.n_mels)
            log_mel_features = librosa.power_to_db(mel_features, ref=np.max)

            # Apply SpecAugment to the mel spectrogram
            augmented_spec = spec_augment(log_mel_features)

            # Save the augmented mel spectrogram
            self.save_spectrogram(augmented_spec, sample_rate, path, original_filename)

    def save_spectrogram(self, features, sample_rate, path, original_filename):
        fig = plt.figure(figsize=(12, 4))
        ax = plt.Axes(fig, [0., 0., 1., 1.])
        ax.set_axis_off()
        fig.add_axes(ax)
        librosa.display.specshow(features, sr=sample_rate, x_axis='time', y_axis='mel')

        save_path = os.path.join(path, f'{original_filename}.jpg')
        plt.savefig(save_path)
        plt.close()

# Example usage for generating augmented spectrograms
outputpath =r"C:\Users\adwit\Downloads\new_spectograms"  # Change this to the desired output path
augmented_spectrogram_generator = AugmentedSpectrograms(df, outputpath=outputpath, mel=True, mfcc=False, spectral=False)

# Generate augmented spectrograms
augmented_spectrogram_generator.get_augmented_spectrograms()


: 

In [None]:
def spec_augment(spec: np.ndarray, num_mask=1,
                 freq_masking_max_percentage=0.10, time_masking_max_percentage=0.15):

    spec = spec.copy()
    for i in range(num_mask):
        all_frames_num, all_freqs_num = spec.shape
        freq_percentage = random.uniform(0, freq_masking_max_percentage)

        num_freqs_to_mask = int(freq_percentage * all_freqs_num)
        f0 = np.random.uniform(low=0.0, high=all_freqs_num - num_freqs_to_mask)
        f0 = int(f0)
        spec[:, f0:f0 + num_freqs_to_mask] = 0.000

        time_percentage = random.uniform(0.0, time_masking_max_percentage)

        num_frames_to_mask = int(time_percentage * all_frames_num)
        t0 = np.random.uniform(low=0.0, high=all_frames_num - num_frames_to_mask)
        t0 = int(t0)
        spec[t0:t0 + num_frames_to_mask, :] = 0.000

    return spec

: 

In [None]:
import os

# Main directories containing spectrograms and chromagrams
spectrograms_dir = r"C:\Users\adwit\Downloads\new_spectograms"
chromagrams_dir =r"C:\Users\adwit\Downloads\new_chromagrams"

# Function to count files in subdirectories
def count_files_in_subdirectories(directory):
    count = 0
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".jpg"):  # Adjust the file extension as per your file type
                count += 1
    return count

# Count files in subdirectories
num_spectrograms = count_files_in_subdirectories(spectrograms_dir)
num_chromagrams = count_files_in_subdirectories(chromagrams_dir)

# If there are more spectrogram images than chromagram images
if num_spectrograms > num_chromagrams:
    # Determine how many spectrogram images need to be deleted
    num_to_delete = num_spectrograms - num_chromagrams

    # Walk through spectrograms directory and delete excess files
    for root, dirs, files in os.walk(spectrograms_dir):
        for file in files:
            if file.endswith(".jpg"):  # Adjust the file extension as per your file type
                os.remove(os.path.join(root, file))
                num_to_delete -= 1
                if num_to_delete == 0:
                    break
        if num_to_delete == 0:
            break

    print(f"{num_to_delete} spectrogram images deleted.")
else:
    print("No deletion needed. Number of spectrograms matches number of chromagrams.")


: 

In [None]:
import os

# Main directories containing spectrograms and chromagrams
spectrograms_dir = r"C:\Users\adwit\Downloads\new_spectograms"
chromagrams_dir = r"C:\Users\adwit\Downloads\new_chromagrams"

# Function to count files in subdirectories
def count_files_in_subdirectories(directory):
    count = 0
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".jpg"):  # Adjust the file extension as per your file type
                count += 1
    return count

# Count files in subdirectories for spectrograms and chromagrams
num_spectrograms = count_files_in_subdirectories(spectrograms_dir)
num_chromagrams = count_files_in_subdirectories(chromagrams_dir)

# If there are more spectrogram images than chromagram images
if num_spectrograms > num_chromagrams:
    # Determine how many spectrogram images need to be deleted
    num_to_delete = num_spectrograms - num_chromagrams

    # Walk through spectrograms directory and delete excess files
    for root, dirs, files in os.walk(spectrograms_dir):
        for file in files:
            if file.endswith(".jpg"):  # Adjust the file extension as per your file type
                os.remove(os.path.join(root, file))
                num_to_delete -= 1
                if num_to_delete == 0:
                    break
        if num_to_delete == 0:
            break

    print(f"{num_to_delete} spectrogram images deleted.")
else:
    print("No deletion needed. Number of spectrograms matches number of chromagrams.")


: 

In [None]:
 import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelBinarizer
dataset_dir = r"C:\Users\adwit\Downloads\specti-newaug"

spectrograms = []
labels = []
for emotion_category in os.listdir(dataset_dir):
    category_dir = os.path.join(dataset_dir, emotion_category)
    if os.path.isdir(category_dir):
        for filename in os.listdir(category_dir):
            img = load_img(os.path.join(category_dir, filename), target_size=(256,256,3))
            img_array = img_to_array(img)
            spectrograms.append(img_array)
            labels.append(emotion_category)
spectrograms = np.array(spectrograms)
labels = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(spectrograms, labels, test_size=0.2, random_state=42)
label_binarizer = LabelBinarizer()
y_train = label_binarizer.fit_transform(y_train)
y_test = label_binarizer.transform(y_test)



: 

In [None]:
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Concatenate, Input, BatchNormalization, Dropout
import numpy as np

input_shape = (256, 256, 3)

# Define the model architecture
input_layer1 = Input(shape=input_shape)
x1 = Conv2D(32, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer1)
x1 = BatchNormalization()(x1)
x1 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x1)

input_layer2 = Input(shape=input_shape)
x2 = Conv2D(64, (3, 3), activation='relu', padding='same', strides=(2, 2))(input_layer2)
x2 = BatchNormalization()(x2)
x2 = MaxPooling2D(pool_size=(3, 3), strides=(3, 3))(x2)

concatenated_input = Concatenate()([x1, x2])

x3 = Conv2D(128, (3, 3), activation='relu', padding='same')(concatenated_input)
x3 = BatchNormalization()(x3)
x3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x3)
# New Convolutional Layer
x4 = Conv2D(256, (3,3), activation='relu', padding='same')(x3)
x4 = BatchNormalization()(x4)
x4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x4)

x5 = Conv2D(512, (3,3), activation='relu', padding='same')(x4)
x5 = BatchNormalization()(x5)
x5 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x5)

flatten_layer = Flatten()(x5)
x = Dense(128, activation='elu')(flatten_layer)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(256, activation='elu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(256, activation='elu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(128, activation='elu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

num_classes = 8
output_layer = Dense(num_classes, activation='softmax')(x)

# Original model
model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()



: 

In [None]:
# Train the model
history = model.fit([X_train, X_train], y_train, batch_size=32, epochs=30, validation_data=([X_test, X_test], y_test))
from tensorflow.keras.models import save_model


from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have trained your CNN model and obtained predictions
# Replace these lines with your actual prediction code
# model = ... (your CNN model)
y_pred = model.predict([X_test, X_test])  # Example of getting predictions, adjust according to your model

# Calculate accuracy
accuracy = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

# Calculate precision, recall, and F1-score
precision = precision_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1), average='weighted')
recall = recall_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1), average='weighted')
f1 = f1_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1), average='weighted')

print("Accuracy:", accuracy)
print("Average Precision:", precision)
print("Average Recall:", recall)
print("Average F1-score:", f1)
# Extract accuracy values from history
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

import matplotlib.pyplot as plt

# Extract accuracy values from history
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

# Plot accuracy vs. epochs
plt.plot(range(1, len(train_accuracy) + 1), train_accuracy, label='Train')
plt.plot(range(1, len(val_accuracy) + 1), val_accuracy, label='Test')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epochs')              
plt.legend()
plt.show()


: 

: 