In [1]:
from  am_analysis import am_analysis as ama
import skimage.metrics as metrics
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
import glob
from tqdm import tqdm
from IPython.display import Audio
import sys
import pandas as pd

In [2]:
# !pip install torchvision


In [3]:
# FUNCTIONS FOR MODULATION SPECTROGRAM
def modSpec(x, fs):
    win_size_sec = 0.04  # window length for the STFFT (seconds)
    win_shft_sec = 0.01  # shift between consecutive windows (seconds)

    stft_modulation_spectrogram = ama.strfft_modulation_spectrogram(
        x,
        fs,
        win_size=round(win_size_sec * fs),
        win_shift=round(win_shft_sec * fs))

    return stft_modulation_spectrogram

def specImage(filename):
    fs, x = wavfile.read(filename)
    x_name = ['speech']
    x = x / np.max(x)
    # 1s segment to analyze
    # x = x[int(fs*1.6) : int(fs*3.6)]

    X_data = modSpec(x, fs)

    ama.plot_modulation_spectrogram_data(X_data,
                                         0,
                                         modf_range=np.array([0, 20]),
                                         c_range=np.array([-90, -50]))

    # Get the current figure and convert it to a 3D array
    fig = plt.gcf()
    fig.canvas.draw()
    plot_data_rgba = np.array(fig.canvas.renderer.buffer_rgba())
    plt.close()  # Close the plot to free up resources

    # Remove the alpha channel to get a 3D array
    plot_data_rgb = plot_data_rgba[:, :, :3]

    return plot_data_rgb



In [4]:
# Get the spectrogram image
filepath1 = "../ResNet/EATD_Corpus_Complete/Test/Test_D/negative_out_84.wav"
img1 = specImage(filepath1)

filepath2 = "../ResNet/EATD_Corpus_Complete/Test/Test_D/negative_out_84.wav"
img2 = specImage(filepath2)

In [5]:
def ssimFromAudio(filepath1, filepath2,win_size=11):
    img1 = specImage(filepath1)
    img2 = specImage(filepath2)
    ssim_score = metrics.structural_similarity(img1, img2, win_size=win_size, channel_axis=2)
    return ssim_score
def playAudio(path):
    return Audio(path)

In [6]:
# EATD
# CODE : 1 if Depressed else 0
EATD = {
    "TRAIN_D": {"src": "../ResNet/EATD_Corpus_Complete/Training/Utterances_D/", "Storage" : [], "Code" : 1},
    "TRAIN_ND": {"src": "../ResNet/EATD_Corpus_Complete/Training/Utterances_ND/", "Storage" :[] ,"Code" : 0},
    "TEST_D": {"src": "../ResNet/EATD_Corpus_Complete/Test/Test_D/", "Storage": [],"Code" : 1},
    "TEST_ND": {"src": "../ResNet/EATD_Corpus_Complete/Test/Test_ND/", "Storage": [],"Code" : 0}
}
for key in EATD.keys():
    EATD[key]["Storage"] =  glob.glob(EATD[key]["src"] + "*")


In [7]:
EATD_SPEC_TRAIN = []

column = 'TRAIN_D'
code_value = EATD[column]['Code']  # Get the code value outside the loop

for i in tqdm(EATD[column]['Storage']):
    try:
        EATD_SPEC_TRAIN.append({"Image": specImage(i), "Code": code_value})
    except:
        print(f"Error in {i}")
        
column = 'TRAIN_ND'
code_value = EATD[column]['Code']  # Get the code value outside the loop

for i in tqdm(EATD[column]['Storage']):
    try:
        EATD_SPEC_TRAIN.append({"Image": specImage(i), "Code": code_value})
    except:
        print(f"Error in {i}")

100%|██████████████████████████████████████████████████████████████████████████████████| 57/57 [00:44<00:00,  1.29it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 192/192 [02:55<00:00,  1.10it/s]


In [8]:
df = pd.DataFrame(EATD_SPEC_TRAIN)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader

# Define your custom dataset class
class CustomDataset(Dataset):
    def __init__(self, data_df, transform=None):
        self.data_df = data_df
        self.transform = transform

    def __len__(self):
        return len(self.data_df)

    def __getitem__(self, idx):
        image = self.data_df.iloc[idx]['Image']
        label = self.data_df.iloc[idx]['Code']
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

# Assuming you have a DataFrame named df with "Image" and "Code" columns
# Modify the preprocessing as needed
transform = transforms.Compose([transforms.ToTensor()])  # You can add more transformations here
train_dataset = CustomDataset(data_df=df, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Define the ResNet model
class ResNetModel(nn.Module):
    def __init__(self, num_classes):
        super(ResNetModel, self).__init__()
        self.resnet = models.resnet18(pretrained=False)
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, num_classes)

    def forward(self, x):
        return self.resnet(x)

# Define the model and optimizer
num_classes = 2  # Binary classification
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNetModel(num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
num_epochs = 2
for epoch in range(num_epochs):
    model.train()
    for batch_inputs, batch_labels in train_loader:
        batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.to(device)

        optimizer.zero_grad()
        batch_outputs = model(batch_inputs)
        loss = criterion(batch_outputs, batch_labels)
        loss.backward()
        optimizer.step()

        print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {loss.item():.4f}")

print("Training complete!")

# Save the trained model
torch.save(model.state_dict(), "resnet_model.pth")




Epoch [1/2] Loss: 0.5171
Epoch [1/2] Loss: 0.8449


In [None]:
from torchvision import transforms

# Load the trained model
model = ResNetModel(num_classes)  # Instantiate your model
model.load_state_dict(torch.load("resnet_model.pth"))  # Load the saved model weights
model.eval()  # Set the model to evaluation mode

# Create a list to store predicted classes
predicted_classes = []

# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    # Get the 3D image array from the DataFrame
    input_image_array = row["Image"]
    
    # Preprocess the input image array
    transform = transforms.Compose([
        transforms.ToPILImage(),          # Convert to PIL Image
        transforms.Resize((224, 224)),    # Resize the image to the same size used during training
        transforms.ToTensor(),            # Convert to tensor
        transforms.Normalize(             # Normalize pixel values
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])
    
    input_tensor = transform(input_image_array).unsqueeze(0)  # Add batch dimension
    
    # Perform prediction
    with torch.no_grad():
        output = model(input_tensor)
        predicted_class = torch.argmax(output, dim=1).item()
    
    predicted_classes.append(predicted_class)

# Add the predicted classes to the DataFrame
df["Predicted_Class"] = predicted_classes



In [None]:

# Print the DataFrame with predicted classes
print(df.drop('Image',axis=1))