with GPU acceleration

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import OneHotEncoder
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim

# Function to load hidden states and labels from npy files
def load_test_train(root_dir, speakers, layer, max_files_per_speaker=50):
    hidden_states_list = []
    labels_list = []
    speaker_to_label = {speaker: idx for idx, speaker in enumerate(speakers)}
    
    for speaker in tqdm(speakers):
        layer_path = os.path.join(root_dir, speaker, f'layer_{layer}')
        file_paths = glob.glob(os.path.join(layer_path, '*.npy'))[:max_files_per_speaker]
        for file_path in file_paths:
            hidden_states = np.load(file_path)
            if hidden_states.ndim == 3 and hidden_states.shape[0] == 1 and hidden_states.shape[2] == 1024:
                hidden_states = hidden_states.squeeze(0)  # Remove the unnecessary dimension
                labels = speaker_to_label[speaker]  # Generate labels from speaker ID
                hidden_states_list.append(hidden_states)
                labels_list.append(labels)
            else:
                print(f"Unexpected shape {hidden_states.shape} for file {file_path}, skipping.")
    
    # Pad hidden states to the maximum length
    max_length = max(hidden_states.shape[0] for hidden_states in hidden_states_list)
    padded_hidden_states_list = [np.pad(hidden_states, ((0, max_length - hidden_states.shape[0]), (0, 0)), mode='constant') for hidden_states in hidden_states_list]
    
    return np.array(padded_hidden_states_list), np.array(labels_list)

# Logistic Regression model with PyTorch
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        outputs = self.linear(x)
        return outputs

# Example speakers and number of layers
root_dir = '/home/rag/experimental_trial/data/all_speakers_xlrs_new_28_05' 
speakers = ['speaker_' + str(i) for i in range(1, 51)]
num_layers = 25  # Example number of layers

results = []

encoder = OneHotEncoder(sparse_output=False)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

for i in range(num_layers):
    print(f"Processing layer {i}")
    hidden_states_layer, labels_layer = load_test_train(root_dir, speakers, i)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(hidden_states_layer, labels_layer, test_size=0.2, random_state=42)

    # Verify labels
    num_classes = len(np.unique(y_train))

    # Convert data to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.long)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.long)

    # Move data to GPU if available
    X_train = X_train.to(device)
    y_train = y_train.to(device)
    X_test = X_test.to(device)
    y_test = y_test.to(device)

    # Initialize model
    input_dim = X_train.shape[1] * X_train.shape[2]  # Number of features after flattening
    output_dim = num_classes  # Number of classes
    model = LogisticRegressionModel(input_dim, output_dim).to(device)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001)
    
    # Early stopping parameters
    best_loss = float('inf')
    patience = 10
    patience_counter = 0

    # Training
    num_epochs = 1000
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train.view(X_train.size(0), -1))  # Flatten the inputs
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        
        # Early stopping check
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_test.view(X_test.size(0), -1))
            val_loss = criterion(val_outputs, y_test).item()

        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
        
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch}")
            break
    
    # Evaluation
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test.view(X_test.size(0), -1)).argmax(dim=1)
    
    y_test_cpu = y_test.cpu().numpy()
    y_pred_cpu = y_pred.cpu().numpy()

    acc = accuracy_score(y_test_cpu, y_pred_cpu)
    prec = precision_score(y_test_cpu, y_pred_cpu, average='weighted', zero_division=0)
    rec = recall_score(y_test_cpu, y_pred_cpu, average='weighted', zero_division=0)
    f1 = f1_score(y_test_cpu, y_pred_cpu, average='weighted', zero_division=0)
   
    results.append({'layer': i, 'accuracy': acc, 'precision': prec, 'recall': rec, 'f1': f1})
    print(f"Layer {i} - Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}")

# Save results to a CSV file
results_df = pd.DataFrame(results)
os.makedirs('results', exist_ok=True)
results_df.to_csv('results/logreg_50_xlsr_es.csv', index=False)

without GPU turned out to achive higher results

In [6]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
import os
import glob

# Function to load hidden states and labels from npy files
def load_test_train(root_dir, speakers, layer, max_files_per_speaker=50):
    hidden_states_list = []
    labels_list = []
    speaker_to_label = {speaker: idx for idx, speaker in enumerate(speakers)}
    
    for speaker in tqdm(speakers):
        layer_path = os.path.join(root_dir, speaker, f'layer_{layer}')
        file_paths = glob.glob(os.path.join(layer_path, '*.npy'))[:max_files_per_speaker]
        for file_path in file_paths:
            hidden_states = np.load(file_path)
            if hidden_states.ndim == 3 and hidden_states.shape[0] == 1 and hidden_states.shape[2] == 1024:
                hidden_states = hidden_states.squeeze(0)  # Remove the unnecessary dimension
                labels = speaker_to_label[speaker]  # Generate labels from speaker ID
                hidden_states_list.append(hidden_states)
                labels_list.append(labels)
            else:
                print(f"Unexpected shape {hidden_states.shape} for file {file_path}, skipping.")
    
    # Pad hidden states to the maximum length
    max_length = max(hidden_states.shape[0] for hidden_states in hidden_states_list)
    padded_hidden_states_list = [np.pad(hidden_states, ((0, max_length - hidden_states.shape[0]), (0, 0)), mode='constant') for hidden_states in hidden_states_list]
    
    return np.array(padded_hidden_states_list), np.array(labels_list)

# Example speakers and number of layers
root_dir = '/home/rag/experimental_trial/data/all_speakers_w2vec_28.05' 
speakers = ['speaker_' + str(i) for i in range(1, 51)]
num_layers = 25  # Example number of layers

results = []

encoder = OneHotEncoder(sparse_output=False)

for i in range(num_layers):
    print(f"Processing layer {i}")
    hidden_states_layer, labels_layer = load_test_train(root_dir, speakers, i)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(hidden_states_layer, labels_layer, test_size=0.2, random_state=42)

    # Flatten the inputs
    X_train = X_train.reshape(X_train.shape[0], -1)
    X_test = X_test.reshape(X_test.shape[0], -1)

    # Initialize and train the logistic regression model
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    
    # Evaluation
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
   
    results.append({'layer': i, 'accuracy': acc, 'precision': prec, 'recall': rec, 'f1': f1})
    print(f"Layer {i} - Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}")

# Save results to a CSV file
results_df = pd.DataFrame(results)
os.makedirs('results', exist_ok=True)
results_df.to_csv('results/logreg_50_w2v_cpu.csv', index=False)


Processing layer 0


100%|██████████| 50/50 [00:26<00:00,  1.87it/s]


Layer 0 - Accuracy: 0.9020, Precision: 0.9179, Recall: 0.9020, F1: 0.9021
Processing layer 1


100%|██████████| 50/50 [00:26<00:00,  1.91it/s]


Layer 1 - Accuracy: 0.8940, Precision: 0.9150, Recall: 0.8940, F1: 0.8953
Processing layer 2


100%|██████████| 50/50 [00:25<00:00,  1.93it/s]


Layer 2 - Accuracy: 0.9340, Precision: 0.9434, Recall: 0.9340, F1: 0.9345
Processing layer 3


100%|██████████| 50/50 [00:25<00:00,  1.94it/s]


Layer 3 - Accuracy: 0.9260, Precision: 0.9381, Recall: 0.9260, F1: 0.9248
Processing layer 4


100%|██████████| 50/50 [00:25<00:00,  1.94it/s]


Layer 4 - Accuracy: 0.9260, Precision: 0.9403, Recall: 0.9260, F1: 0.9255
Processing layer 5


100%|██████████| 50/50 [00:26<00:00,  1.90it/s]


Layer 5 - Accuracy: 0.9140, Precision: 0.9247, Recall: 0.9140, F1: 0.9142
Processing layer 6


100%|██████████| 50/50 [00:25<00:00,  1.97it/s]


Layer 6 - Accuracy: 0.9260, Precision: 0.9334, Recall: 0.9260, F1: 0.9266
Processing layer 7


100%|██████████| 50/50 [00:25<00:00,  1.94it/s]


Layer 7 - Accuracy: 0.9080, Precision: 0.9156, Recall: 0.9080, F1: 0.9070
Processing layer 8


100%|██████████| 50/50 [00:25<00:00,  1.97it/s]


Layer 8 - Accuracy: 0.8580, Precision: 0.8698, Recall: 0.8580, F1: 0.8555
Processing layer 9


100%|██████████| 50/50 [00:25<00:00,  1.95it/s]


Layer 9 - Accuracy: 0.8260, Precision: 0.8529, Recall: 0.8260, F1: 0.8276
Processing layer 10


100%|██████████| 50/50 [00:25<00:00,  1.93it/s]


Layer 10 - Accuracy: 0.8100, Precision: 0.8285, Recall: 0.8100, F1: 0.8085
Processing layer 11


100%|██████████| 50/50 [00:25<00:00,  1.94it/s]


Layer 11 - Accuracy: 0.7660, Precision: 0.8034, Recall: 0.7660, F1: 0.7665
Processing layer 12


100%|██████████| 50/50 [00:25<00:00,  1.98it/s]


Layer 12 - Accuracy: 0.7220, Precision: 0.7688, Recall: 0.7220, F1: 0.7271
Processing layer 13


100%|██████████| 50/50 [00:26<00:00,  1.91it/s]


Layer 13 - Accuracy: 0.7340, Precision: 0.7888, Recall: 0.7340, F1: 0.7364
Processing layer 14


100%|██████████| 50/50 [00:25<00:00,  1.98it/s]


Layer 14 - Accuracy: 0.6640, Precision: 0.7327, Recall: 0.6640, F1: 0.6683
Processing layer 15


100%|██████████| 50/50 [00:25<00:00,  1.94it/s]


Layer 15 - Accuracy: 0.6460, Precision: 0.7147, Recall: 0.6460, F1: 0.6517
Processing layer 16


100%|██████████| 50/50 [00:24<00:00,  2.02it/s]


Layer 16 - Accuracy: 0.6700, Precision: 0.7137, Recall: 0.6700, F1: 0.6757
Processing layer 17


100%|██████████| 50/50 [00:25<00:00,  1.99it/s]


Layer 17 - Accuracy: 0.6560, Precision: 0.7278, Recall: 0.6560, F1: 0.6628
Processing layer 18


100%|██████████| 50/50 [00:24<00:00,  2.06it/s]


Layer 18 - Accuracy: 0.6440, Precision: 0.7109, Recall: 0.6440, F1: 0.6538
Processing layer 19


100%|██████████| 50/50 [00:14<00:00,  3.46it/s]


Layer 19 - Accuracy: 0.5740, Precision: 0.6293, Recall: 0.5740, F1: 0.5741
Processing layer 20


100%|██████████| 50/50 [00:23<00:00,  2.15it/s]


Layer 20 - Accuracy: 0.4200, Precision: 0.4515, Recall: 0.4200, F1: 0.4153
Processing layer 21


100%|██████████| 50/50 [00:23<00:00,  2.16it/s]


Layer 21 - Accuracy: 0.3420, Precision: 0.3737, Recall: 0.3420, F1: 0.3425
Processing layer 22


100%|██████████| 50/50 [00:16<00:00,  2.96it/s]


Layer 22 - Accuracy: 0.2720, Precision: 0.2875, Recall: 0.2720, F1: 0.2664
Processing layer 23


100%|██████████| 50/50 [00:16<00:00,  2.97it/s]


Layer 23 - Accuracy: 0.2640, Precision: 0.3126, Recall: 0.2640, F1: 0.2669
Processing layer 24


100%|██████████| 50/50 [00:16<00:00,  3.05it/s]


Layer 24 - Accuracy: 0.3040, Precision: 0.3297, Recall: 0.3040, F1: 0.2909
