In [None]:
import pandas as pd
import librosa
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision import datasets, transforms
from tqdm import tqdm_notebook as tqdm
import os


from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import preprocessing

from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier


#from keras.datasets import mnist

In [None]:
os.chdir("/kaggle/input/ravdess/Ravdess")

In [None]:
def load_and_get_audio_data(path_to_data_for_audio):
    """
    
    path_to_data_for_audio: Path to the Audio_Speech_Actors_01 folder.
    output: Pandas Dataframe
    
    source: https://github.com/mkosaka1/Speech_Emotion_Recognition
    """

    actor_folders = os.listdir(path_to_data_for_audio)


    emotion = []
    gender = []
    actor = []
    file_path = []
    for i in actor_folders:
        filename = os.listdir(path_to_data_for_audio + i) #iterate over Actor folders
        for f in filename: # go through files in Actor folder
            part = f.split('.')[0].split('-')
            emotion.append(int(part[2]))
            actor.append(int(part[6]))
            bg = int(part[6])
            if bg%2 == 0:
                bg = "female"
            else:
                bg = "male"
            gender.append(bg)
            file_path.append(path_to_data_for_audio + i + '/' + f)
    # PUT EXTRACTED LABELS WITH FILEPATH INTO DATAFRAME
    audioDeep_df = pd.DataFrame(emotion)
    audioDeep_df = audioDeep_df.replace({1:'neutral', 2:'calm', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'})
    audioDeep_df = pd.concat([pd.DataFrame(gender),audioDeep_df,pd.DataFrame(actor)],axis=1)
    audioDeep_df.columns = ['gender','emotion','actor']
    audioDeep_df = pd.concat([audioDeep_df,pd.DataFrame(file_path, columns = ['path'])],axis=1)
    
    
   # ITERATE OVER ALL AUDIO FILES AND EXTRACT LOG MEL SPECTROGRAM MEAN VALUES INTO DF FOR MODELING 
    df = pd.DataFrame(columns=['mel_spectrogram'])

    counter=0

    for index,path in enumerate(audioDeep_df.path):
        X, sample_rate = librosa.load(path, res_type='kaiser_fast',duration=3,sr=44100,offset=0.5)

        spectrogram = librosa.feature.melspectrogram(y=X, sr=sample_rate, n_mels=128,fmax=sample_rate/2)
        db_spec = librosa.power_to_db(spectrogram)

        df.loc[counter] = [db_spec]
        counter=counter+1
    
    
    
    audioDeep_df = pd.concat([audioDeep_df,pd.DataFrame(df['mel_spectrogram'].values.tolist())],axis=1)
    
    """
   # spectrogram as image files
    df = pd.DataFrame(columns=['mel_spectrogram_img'])

    counter=0

    for index,path in enumerate(audioDeep_df.path):
        X, sample_rate = librosa.load(path, res_type='kaiser_fast',duration=3,sr=44100,offset=0.5)
        
        spectrogram = librosa.feature.melspectrogram(y=X, sr=sample_rate, n_mels=128,fmax=sample_rate/2)
        db_spec = librosa.power_to_db(spectrogram)
        db_spec = spec_to_image(db_spec)
        
        df.loc[counter] = [db_spec]
        counter=counter+1
    
    
    
    audioDeep_df = pd.concat([audioDeep_df,pd.DataFrame(df['mel_spectrogram_img'].values.tolist())],axis=1)
    """
    
    return audioDeep_df

In [None]:
#path_to_data_for_audio = "Data/AudioEmotion/AudioEmotion/Audio_Speech_Actors_01-24/"
path_to_data_for_audio = './Audio Emotion/'
audioDeep = load_and_get_audio_data(path_to_data_for_audio)
audioDeep.columns= ['gender', 'emotion', 'actor','path',"mel_spectrogram"]
audioDeep.head()

In [None]:
def spec_to_image(spec, eps=1e-6):
  mean = spec.mean()
  std = spec.std()
  spec_norm = (spec - mean) / (std + eps)
  spec_min, spec_max = spec_norm.min(), spec_norm.max()
  spec_scaled = 255 * (spec_norm - spec_min) / (spec_max - spec_min)
  spec_scaled = spec_scaled.astype(np.uint8)
  return spec_scaled

In [None]:
def get_melspectrogram_db(file_path, sr=44100, n_fft=2048, hop_length=512, n_mels=128, fmin=20, fmax=8300, top_db=80):
  wav,sr = librosa.load(file_path,sr=sr,duration=5) # i added duration, and changed sr
  if wav.shape[0]<5*sr:
    wav=np.pad(wav,int(np.ceil((5*sr-wav.shape[0])/2)),mode='reflect')
  else:
    wav=wav[:5*sr]
  spec=librosa.feature.melspectrogram(wav, sr=sr, n_fft=n_fft,
              hop_length=hop_length,n_mels=n_mels,fmin=fmin,fmax=fmax)
  spec_db=librosa.power_to_db(spec,top_db=top_db)
  return spec_db

In [None]:
audioDeep["mel_spectrogram"]

In [None]:
# NORMALIZE DATA
#mean = np.mean(audioDeep["mel_spectrogram"], axis=0)
#mean = np.mean(audioDeep["mel_spectrogram"])
#std = np.std(audioDeep["mel_spectrogram"])
#audioDeep["mel_spectrogram"] = (audioDeep["mel_spectrogram"] - mean)/std
#X_test = (X_test - mean)/std
#audioDeep["mel_spectrogram"]

In [None]:
# Split training data into valid and test.
train_dataz, test_dataz = train_test_split(audioDeep, test_size=0.2, random_state=0,
                               stratify=audioDeep[['emotion','gender','actor']])

train_dataz, valid_dataz = train_test_split(train_dataz, test_size=0.25, random_state=0)

train = train_dataz
valid = valid_dataz
test = test_dataz


print(f'Number of training examples: {len(train_dataz)}')
print(f'Number of validation examples: {len(valid_dataz)}')
print(f'Number of testing examples: {len(test_dataz)}')

In [None]:
# Split image paths from labels
#(train_X, train_y), (test_X, test_y) = mnist.load_data()
#(spec_to_image(get_melspectrogram_db(filename, sr)), cmap='viridis')

#x_train = train[["mel_spectrogram"]]**2 # Attempts to normalize, multiplied by power. Abs() is another attempt.
x_train = train[["mel_spectrogram"]]
y_train = train[["gender", "emotion","actor"]]

#x_val = valid[["mel_spectrogram"]]**2
x_val = valid[["mel_spectrogram"]]
y_val = valid[["gender", "emotion","actor"]]

#x_test = test[["mel_spectrogram"]]**2
x_test = test[["mel_spectrogram"]]
y_test = test[["gender", "emotion","actor"]]


#x_train = x_train.load_data
#imgTest = spec_to_image(get_melspectrogram_db(str(x_train.iloc[0]['path']), 44100))
imgTest = spec_to_image((x_train.iloc[0]['mel_spectrogram']))


#x_train
librosa.display.specshow(imgTest)

#df_test.iloc[0]

#librosa.display.specshow(spec_to_image(get_melspectrogram_db(x_train[0], 44100)), cmap='viridis')

In [None]:
x_train

In [None]:
# Convert values to image, to array, insert padding and then convert to tensors

x_traindata = []
x_valdata = []
x_testdata = []

# For Training data
for item in range(len(x_train)):
    x_traindata.append(spec_to_image(x_train.iloc[item]['mel_spectrogram'])[np.newaxis,...])

for item in range(len(x_traindata)):

    print(len(x_traindata[item][0]))
    # Pad null values
    x_traindata[item] = np.pad(x_traindata[item], ((0,0), (0,0), (0, 259 - len(x_traindata[item][0][0])))
    , 'constant', constant_values=0)
    #Pad 128 to equal 259 values
    x_traindata[item] = np.pad(x_traindata[item], ((0,0), (0,259 - len(x_traindata[item][0])), (0, 0))
    , 'constant', constant_values=0)
    print(x_traindata[item].shape)

# For Validation data
for item in range(len(x_val)):
    x_valdata.append(spec_to_image(x_val.iloc[item]['mel_spectrogram'])[np.newaxis,...])

for item in range(len(x_valdata)):

    print(len(x_valdata[item][0][0]))
    # Pad null values
    x_valdata[item] = np.pad(x_valdata[item], ((0,0), (0,0), (0, 259 - len(x_valdata[item][0][0])))
    , 'constant', constant_values=0)
    #Pad 128 to equal 259 values
    x_valdata[item] = np.pad(x_valdata[item], ((0,0), (0,259 - len(x_valdata[item][0])), (0, 0))
    , 'constant', constant_values=0)
    print(x_valdata[item].shape)

# For Testing data
for item in range(len(x_test)):
    x_testdata.append(spec_to_image(x_test.iloc[item]['mel_spectrogram'])[np.newaxis,...])

for item in range(len(x_testdata)):

    print(len(x_testdata[item][0][0]))
    # Pad null values
    x_testdata[item] = np.pad(x_testdata[item], ((0,0), (0,0), (0, 259 - len(x_testdata[item][0][0])))
    , 'constant', constant_values=0)
    #Pad 128 to equal 259 values
    x_testdata[item] = np.pad(x_testdata[item], ((0,0), (0,259 - len(x_testdata[item][0])), (0, 0))
    , 'constant', constant_values=0)
    print(x_testdata[item].shape)



In [None]:
#y_train['emotion'].values

y_traindata = []
y_valdata = []
y_testdata = []

# For Training data
for item in range(len(y_train)):
    y_traindata.append((y_train.iloc[item]['emotion']))

# For Validation data
for item in range(len(y_val)):
    y_valdata.append((y_val.iloc[item]['emotion']))

# For Testing data
for item in range(len(y_test)):
    y_testdata.append((y_test.iloc[item]['emotion']))


In [None]:
le = preprocessing.LabelEncoder()
y_traindatatargets = le.fit_transform(y_traindata)
y_valdatatargets = le.fit_transform(y_valdata)
y_testdatatargets = le.fit_transform(y_testdata)


In [None]:
### LOAD THE DATA INTO TENSORS ###
#x_trainz = torch.tensor(x_traindata,dtype=torch.float32)/864. #864 #255 might be good?
x_trainz = torch.tensor(x_traindata,dtype=torch.float32)
y_trainz = torch.tensor(y_traindatatargets,dtype=torch.long)

#x_valz = torch.tensor(x_valdata,dtype=torch.float32)/864.
x_valz = torch.tensor(x_valdata,dtype=torch.float32)
y_valz = torch.tensor(y_valdatatargets,dtype=torch.long)

#x_testz = torch.tensor(x_testdata,dtype=torch.float32)/864.
x_testz = torch.tensor(x_testdata,dtype=torch.float32)
y_testz = torch.tensor(y_testdatatargets,dtype=torch.long)

## AUTOENCODEER

In [None]:
### ITS AUTOENCODEER TIME!!!

In [None]:
# Commented out for CNN
#x_trainz = x_trainz.reshape([-1,128*259])
#x_valz = x_trainz.reshape([-1,128*259])
#x_testz = x_testz.reshape([-1,128*259])

#x_trainz = x_trainz.reshape([-1,128,259])
#x_valz = x_trainz.reshape([-1,128,259])
#x_testz = x_testz.reshape([-1,128,259])


#x_trainz = x_trainz.reshape([128,128])
#x_valz = x_trainz.reshape([128,128])
#x_testz = x_testz.reshape([128,128])

#x_trainz = x_trainz.resize([864,1,128,128])
#x_valz = x_trainz.resize([288,1,128,128])
#x_testz = x_testz.resize([288,1,128,128])

In [None]:
x_testz.shape

In [None]:
x_testz.shape

In [None]:
x_trainz.shape

In [None]:
y_trainz.shape # This should be 512, but it is 864 ???

In [None]:
# [864, 1, 259, 259]
# batch_size, depth, height, width, channels

Encoder = nn.Sequential(nn.Conv2d(1, 8, 3, stride=1, padding=0),
                        nn.ReLU(),
                        nn.Conv2d(8, 4, 3, stride=1, padding=0),
                        #nn.Linear()
                       )


Decoder = nn.Sequential(nn.ConvTranspose2d(4, 8, 3, stride=1, padding=0),
                        nn.ReLU(),
                        nn.ConvTranspose2d(8, 1, 3, stride=1, padding=0),
                        nn.Sigmoid()
                       )


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Encoder = Encoder.to(device)
Decoder = Decoder.to(device)


optimizer = optim.Adam(list(Encoder.parameters()) + list(Decoder.parameters()))
loss_function = nn.MSELoss()

In [None]:
# RUN THE FIRST AUTO MODEL


running_loss = []
for iteration in range(10000**100):
  
    Encoder.train()
    Decoder.train()
    
    
    # # 1%, 25%, 50%, 75%, 100%
    # #Fewer numbers of labels
    percentage_of_label = .01
    number_of_samples = int(len(y_trainz) * percentage_of_label)

    random_indexes = np.random.choice(range(0,len(y_trainz)), number_of_samples)
    
    
    

    #random_indexes = np.random.choice(range(0,len(y_trainz)),512) # This was the old code
    
    #print(random_indexes.shape)
    #print(x_trainz.shape)
    #print(y_trainz.shape)

    x_traina = Variable(np.take(x_trainz,random_indexes,0)).to(device)
    
    #print(x_traina.shape) # [864, 1, 128, 259]
    
    optimizer.zero_grad()

    latent_variable = Encoder(x_traina)
    recon_input = Decoder(latent_variable)
    
    # the bad evil fix is here.
    #recon_input = recon_input.reshape([512, 1, 128, 259]) # evil experiment, dont do it!!! This is bad
    #target = torch.zeros(512, 1, 128, 259)
    #source = recon_input
    #target[:, :, :121 , :257] = source
    
    #recon_input = target
    
    #512, 1, 121, 257
    
    # We are trying to use this fix because
    
    
    #print(latent_variable.shape)
    #print(recon_input.shape) # 512, 1, 121, 257
    loss = loss_function(recon_input, x_traina)

    loss.backward()
  
    optimizer.step()

    running_loss.append(loss.item())


    if iteration%1000 == 0:
        r_loss = sum(running_loss)/len(running_loss)
        #print (r_loss/len(x_trainz)) # Revised loss as a percentage.
        print (r_loss)
        running_loss = []
        


In [None]:
#orginal was 128*259, padded to 259*259

Encoder.eval()
Decoder.eval()

random_indexes = np.random.choice(range(0,len(y_testz)),128)

x_testa = Variable(np.take(x_testz,random_indexes,0)).to(device)

optimizer.zero_grad()

latent_variable = Encoder(x_testa)
recon_input = Decoder(latent_variable)
#test_pic = recon_input[0].reshape([128,259])
#plt.imshow(x_testa[0].reshape(128,259).detach().cpu())

test_pic = recon_input[0].reshape([-1,259,259])
plt.imshow(x_testa[0].reshape(259,259).detach().cpu())

In [None]:
#plt.imshow(test_pic.detach().cpu())
plt.imshow(test_pic.reshape(259,259).detach().cpu())

In [None]:
from copy import deepcopy

def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

In [None]:
Encoder.eval()
Decoder.eval()

In [None]:
X_train_features = []
for indx,ch in enumerate(chunks(x_trainz,512)):
  X_train_batch = ch.to(device)
  X_train_features_batch = Encoder(X_train_batch)
  X_train_features.extend(X_train_features_batch)


X_test_features = []
for ch in chunks(x_testz,512):
  X_test_batch = ch.to(device)
  X_test_features_batch = Encoder(X_test_batch)
  X_test_features.extend(X_test_features_batch)


X_train_features = torch.stack(X_train_features).detach().cpu().numpy()
X_test_features = torch.stack(X_test_features).detach().cpu().numpy()
X_train_features.shape, X_test_features.shape

In [None]:
len(X_train_features)

In [None]:
len(X_test_features)

In [None]:
# RUN THE SECOND AUTO MODEL

Encoder2 = nn.Sequential(nn.Conv2d(1, 8, 3, stride=1, padding=0),
                        nn.ReLU(),
                        nn.Conv2d(8, 4, 3, stride=1, padding=0),
                       )


Decoder2 = nn.Sequential(nn.ConvTranspose2d(4, 8, 3, stride=1, padding=0),
                        nn.ReLU(),
                        nn.ConvTranspose2d(8, 1, 3, stride=1, padding=0),
                        nn.Sigmoid()
                       )


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Encoder2 = Encoder2.to(device)
Decoder2 = Decoder2.to(device)


optimizer = optim.Adam(list(Encoder2.parameters()) + list(Decoder2.parameters()))
loss_function = nn.MSELoss()

In [None]:

running_loss = []
for iteration in range(10000**100):
  
  Encoder.train()
  Decoder.train()

  # # 1%, 25%, 50%, 75%, 100%
  # #Fewer numbers of labels
  percentage_of_label = .01
  number_of_samples = int(len(y_trainz) * percentage_of_label)

  random_indexes = np.random.choice(range(0,len(y_trainz)), number_of_samples)

  #random_indexes = np.random.choice(range(0,len(y_trainz)),512)

  x_traina = Variable(np.take(x_trainz,random_indexes,0)).to(device)
  
  optimizer.zero_grad()

  latent_variable = Encoder2(x_traina)
  recon_input = Decoder2(latent_variable)

  loss = loss_function(recon_input, x_traina)

  loss.backward()
  
  optimizer.step()

  running_loss.append(loss.item())
  


  if iteration%1000 == 0:
    r_loss = sum(running_loss)/len(running_loss)
    print (r_loss)
    running_loss = []


In [None]:
Encoder2.eval()
Decoder2.eval()

In [None]:
X_train_features2 = []
for indx,ch in enumerate(chunks(x_trainz,512)):
  X_train_batch = ch.to(device)
  X_train_features_batch = Encoder2(X_train_batch)
  X_train_features2.extend(X_train_features_batch)


X_test_features2 = []
for ch in chunks(x_testz,512):
  X_test_batch = ch.to(device)
  X_test_features_batch = Encoder2(X_test_batch)
  X_test_features2.extend(X_test_features_batch)


X_train_features2 = torch.stack(X_train_features2).detach().cpu().numpy()
X_test_features2 = torch.stack(X_test_features2).detach().cpu().numpy()
X_train_features2.shape, X_test_features2.shape

In [None]:
# Reshaping forr the sake of clustering:

X_train_features = X_train_features.reshape(864,4*255*255)
X_test_features = X_test_features.reshape(288,4*255*255)

X_train_features2 = X_train_features2.reshape(864,4*255*255)
X_test_features2 = X_test_features2.reshape(288,4*255*255)

In [None]:
plt.figure(figsize=(20,15))
plt.scatter(X_train_features2[:, 0], X_train_features2[:, 1], s= 5, c=y_trainz, cmap='Spectral')
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8))
plt.title('Training Data: Visualizing RAVDESS directly from autoencoder', fontsize=24);
plt.xlabel('feature 1')
plt.ylabel('feature 2')

In [None]:
plt.figure(figsize=(20,15))
plt.scatter(X_test_features2[:, 0], X_test_features2[:, 1], s= 5, c=y_testz, cmap='Spectral')
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8))
plt.title('Testing Data: Visualizing RAVDESS directly from autoencoder', fontsize=24);
plt.xlabel('feature 1')
plt.ylabel('feature 2')

In [None]:
# PCA vs Autoencoder

In [None]:
# Reshaping forr the sake of clustering:

x_trainz = x_trainz.reshape(864,1*259*259)
x_testz = x_testz.reshape(288,1*259*259)

In [None]:
#x_testz.shape

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca_fit = pca.fit(x_trainz)
principalComponents = pca_fit.transform(x_trainz) 

In [None]:
plt.figure(figsize=(20,15))
plt.scatter(principalComponents[:, 0], principalComponents[:, 1], s= 5, c=y_trainz, cmap='Spectral')
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8))
plt.title('Visualizing RAVDESS through PCA', fontsize=24);
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')

In [None]:
plt.figure(figsize=(20,15))
principalComponents = pca_fit.transform(x_testz)
plt.scatter(principalComponents[:, 0], principalComponents[:, 1], s= 5, c=y_testz, cmap='Spectral')
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8))
plt.title('Visualizing RAVDESS through PCA', fontsize=24);
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')

In [None]:
pca = PCA(n_components=2)
pca_fit = pca.fit(X_train_features)
principalComponents = pca_fit.transform(X_train_features) 

In [None]:
plt.figure(figsize=(20,15))
plt.scatter(principalComponents[:, 0], principalComponents[:, 1], s= 5, c=y_trainz, cmap='Spectral')
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8))
plt.title('Visualizing RAVDESS through PCA', fontsize=24);
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')

In [None]:
principalComponents = pca_fit.transform(X_test_features)

In [None]:
plt.figure(figsize=(20,15))
plt.scatter(principalComponents[:, 0], principalComponents[:, 1], s= 5, c=y_testz, cmap='Spectral')
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8))
plt.title('Visualizing RAVDESS through PCA', fontsize=24);
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')

In [None]:
# Compare K means

In [None]:
from sklearn.cluster import KMeans

In [None]:
pca = PCA(n_components=2)
PCA_fit = pca.fit(x_trainz)
PCA_train = pca.transform(x_trainz)
PCA_test = pca.transform(x_testz)
kmeans = KMeans(init="k-means++", n_clusters=8, n_init=4)
kmeans.fit(PCA_train)
plt.figure(figsize=(20,15))
plt.scatter(PCA_train[:, 0], PCA_train[:, 1], s= 5, c=y_trainz, cmap='Spectral')
centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1], marker="o", s=200, linewidths=5,
            color="Black", zorder=10)


plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8)) # Maybe this should be 8??
plt.title('Visualizing RAVDESS through PCA', fontsize=24);
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')

In [None]:
plt.figure(figsize=(20,15))
plt.scatter(PCA_test[:, 0], PCA_test[:, 1], s= 5, c=y_testz, cmap='Spectral')
centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1], marker="o", s=200, linewidths=5,
            color="Black", zorder=10)


plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8))
plt.title('Visualizing RAVDESS through PCA', fontsize=24);
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')

In [None]:
pca = PCA(n_components=2)
PCA_fit = pca.fit(X_train_features)
PCA_train = pca.transform(X_train_features)
PCA_test = pca.transform(X_test_features)
kmeans = KMeans(init="k-means++", n_clusters=8, n_init=4) #Changed to 8 clusters
kmeans.fit(PCA_train)
plt.figure(figsize=(20,15))
plt.scatter(PCA_train[:, 0], PCA_train[:, 1], s= 5, c=y_trainz, cmap='Spectral')
centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1], marker="o", s=200, linewidths=5,
            color="Black", zorder=10)


plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8))
plt.title('Visualizing RAVDESS through PCA', fontsize=24);
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')

In [None]:
plt.figure(figsize=(20,15))
plt.scatter(PCA_test[:, 0], PCA_test[:, 1], s= 5, c=y_testz, cmap='Spectral')
centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1], marker="o", s=200, linewidths=5,
            color="Black", zorder=10)


plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(11)-0.5).set_ticks(np.arange(8))
plt.title('Visualizing RAVDESS through PCA', fontsize=24);
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')

In [None]:
# Fit a Classifier on the Representation with fewer label data

In [None]:
#Fewer numbers of labels
#percentage_of_label = .10
percentage_of_label = .01
number_of_samples = int(len(y_trainz) * percentage_of_label)

random_indexes = np.random.choice(range(0,len(y_trainz)), number_of_samples)
small_x_train = np.take(x_trainz,random_indexes,0)
small_y_train = np.take(y_trainz,random_indexes,0)

In [None]:
# PCA version!!!

In [None]:
pca = PCA(n_components=8) # ??? Check to make sure this number is correct, it should probably be 8. Or perhaps not. It was 16
PCA_fit = pca.fit(small_x_train)
PCA_train = pca.transform(small_x_train)
PCA_test = pca.transform(x_testz)

In [None]:
rf_reg = RandomForestClassifier(random_state=42)
rf_reg.fit(PCA_train, small_y_train)
print("Train Accuracy: ", rf_reg.score(PCA_train, small_y_train))
print("Test Accuracy: ", rf_reg.score(PCA_test,y_testz))
#
y_pred = rf_reg.predict(PCA_test)
print("F1 Score: ", f1_score(y_testz, y_pred, average='macro'))
print("Recall: ", recall_score(y_testz, y_pred, average='macro'))
print("Precision: ", precision_score(y_testz, y_pred, average='macro'))

In [None]:
# Encoder 1 version!!!

In [None]:
# #Fewer numbers of labels
# percentage_of_label = .01
# number_of_samples = int(len(train_y) * percentage_of_label)

# random_indexes = np.random.choice(range(0,len(train_y)), number_of_samples)
small_x_train = np.take(X_train_features,random_indexes,0)
small_y_train = np.take(y_trainz,random_indexes,0)
rf_reg = RandomForestClassifier(random_state=42)
rf_reg.fit(small_x_train, small_y_train)
print("Train Accuracy: ", rf_reg.score(small_x_train, small_y_train))
print("Test Accuracy: ", rf_reg.score(X_test_features,y_testz))
#
y_pred = rf_reg.predict(X_test_features)
print("F1 Score: ", f1_score(y_testz, y_pred, average='macro'))
print("Recall: ", recall_score(y_testz, y_pred, average='macro'))
print("Precision: ", precision_score(y_testz, y_pred, average='macro'))

In [None]:
# Encoder 2 version!!!

In [None]:
# # 1%, 25%, 50%, 75%, 100%
# #Fewer numbers of labels
# percentage_of_label = .01
# number_of_samples = int(len(train_y) * percentage_of_label)

# random_indexes = np.random.choice(range(0,len(train_y)), number_of_samples)




#X_train_features2 = X_train_features2.reshape(864,4*255*255)
#X_test_features2 = X_test_features2.reshape(288,4*255*255)

small_x_train = np.take(X_train_features2,random_indexes,0)
small_y_train = np.take(y_trainz,random_indexes,0)
rf_reg = RandomForestClassifier(random_state=42)

#small_x_train = small_x_train.reshape(512,4*255*255)



rf_reg.fit(small_x_train, small_y_train)


print("Train Accuracy: ", rf_reg.score(small_x_train, small_y_train))
print("Test Accuracy: ", rf_reg.score(X_test_features2,y_testz))
#
y_pred = rf_reg.predict(X_test_features2)
print("F1 Score: ", f1_score(y_testz, y_pred, average='macro'))
print("Recall: ", recall_score(y_testz, y_pred, average='macro'))
print("Precision: ", precision_score(y_testz, y_pred, average='macro'))

In [None]:
# Do 100% now!!!!!!!!!!!!!!!!!!!!

In [None]:
# 1%, 25%, 50%, 75%, 100%