## ***Semi-Supervised learning :***
    - Noisy student training(NST)
    - Knowledge expansion and distillation

In [1]:
import tensorflow as tf
import numpy as np
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dropout, LSTM, Dense, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
from google.colab import drive
import librosa

# Mount Google Drive
drive.mount('/content/drive')

# Verify T4 GPU
print("TensorFlow version:", tf.__version__)
print("GPU available:", tf.config.list_physical_devices('GPU'))

# Enable mixed precision for T4 GPU
tf.keras.mixed_precision.set_global_policy('mixed_float16')

# Parameters
batch_size = 64  # Optimized for T4 GPU
data_path = '/content/drive/MyDrive/data.json'  # Your data path
teacher_model_path = '/content/drive/MyDrive/crnn.keras'  # Your crnn.keras path
student_model_save_path = '/content/drive/MyDrive/student_nst.keras'  # For ensemble
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
num_classes = len(genres)

# Data augmentation for unlabeled data (direct MFCC noise)
def augment_mfcc(mfcc, noise_level=0.01):
    # Add Gaussian noise directly to MFCCs
    noise = np.random.normal(0, noise_level, mfcc.shape)
    mfcc_aug = mfcc + noise
    # Clip to prevent non-finite values
    mfcc_aug = np.clip(mfcc_aug, -1e6, 1e6)
    # Ensure shape (130, 13)
    return mfcc_aug[:130, :13]

# Alternative: Audio-based augmentation (stabilized)
def augment_mfcc_audio(mfcc, sr=22050):
    try:
        # Convert MFCC to audio
        audio = librosa.feature.inverse.mfcc_to_audio(mfcc, sr=sr)
        # Clip audio to prevent instability
        audio = np.clip(audio, -1.0, 1.0)
        # Pitch shift (±1 semitone, reduced to avoid instability)
        if np.random.rand() < 0.5:
            n_steps = np.random.uniform(-1, 1)
            audio = librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
            audio = np.clip(audio, -1.0, 1.0)
        # Time stretch (0.9–1.1 rate, reduced)
        if np.random.rand() < 0.5:
            rate = np.random.uniform(0.9, 1.1)
            audio = librosa.effects.time_stretch(audio, rate=rate)
            audio = np.clip(audio, -1.0, 1.0)
        # Add mild Gaussian noise
        audio += np.random.normal(0, 0.005, audio.shape)
        audio = np.clip(audio, -1.0, 1.0)
        # Replace non-finite values
        audio = np.where(np.isfinite(audio), audio, 0.0)
        # Recompute MFCC
        mfcc_aug = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, n_mels=130, hop_length=512)
        # Ensure shape (130, 13)
        if mfcc_aug.shape[1] > 13:
            mfcc_aug = mfcc_aug[:, :13]
        elif mfcc_aug.shape[1] < 13:
            mfcc_aug = np.pad(mfcc_aug, ((0, 0), (0, 13 - mfcc_aug.shape[1])), mode='constant')
        return mfcc_aug[:130, :13]
    except Exception as e:
        print(f"Error in audio augmentation: {e}")
        # Fallback to direct MFCC noise
        return augment_mfcc(mfcc)

# Load labeled data
print("Loading labeled data...")
with open(data_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Extract MFCCs and labels
mfccs = data['mfcc']
labels = data['genre_num']

# Validate and align data
print(f"Original MFCCs length: {len(mfccs)}, Labels length: {len(labels)}")
min_length = min(len(mfccs), len(labels))
mfccs = mfccs[:min_length]
labels = labels[:min_length]
print(f"Adjusted MFCCs length: {len(mfccs)}, Labels length: {len(labels)}")

# Convert to NumPy arrays
X = np.array(mfccs)
y = np.array(labels)
print(f"Data shape: {X.shape}, Labels shape: {y.shape}")

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Generate pseudo-unlabeled data (using direct MFCC augmentation for stability)
print("Generating pseudo-unlabeled data...")
unlabeled_mfccs = np.array([augment_mfcc(mfcc) for mfcc in X])  # Switch to augment_mfcc_audio if needed

# Train-validation-test split
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.3, stratify=y_encoded, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

# Add channel dimension
X_train = X_train[..., np.newaxis]
X_val = X_val[..., np.newaxis]
X_test = X_test[..., np.newaxis]
unlabeled_mfccs = unlabeled_mfccs[..., np.newaxis]
print(f"X_train shape: {X_train.shape}, X_val shape: {X_val.shape}, X_test shape: {X_test.shape}, Unlabeled shape: {unlabeled_mfccs.shape}")

# Create tf.data datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Step 1: Load Pre-trained Teacher Model
print("Loading pre-trained teacher model...")
try:
    teacher_model = load_model(teacher_model_path, compile=False)
    teacher_model.compile(optimizer=Adam(learning_rate=0.0001),
                         loss='sparse_categorical_crossentropy',
                         metrics=['accuracy'])
    teacher_model.summary()
except Exception as e:
    print(f"Error loading teacher model: {e}")
    raise

# Evaluate teacher
test_loss, test_accuracy = teacher_model.evaluate(test_dataset)
print(f"Teacher Test Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.4f}")

# Step 2: Generate Pseudo-Labels
print("Generating pseudo-labels...")
pseudo_labels = teacher_model.predict(unlabeled_mfccs)
pseudo_labels_soft = pseudo_labels  # Soft labels
pseudo_labels_hard = np.argmax(pseudo_labels, axis=1)  # Hard labels

# Combine labeled and pseudo-labeled data
X_combined = np.concatenate([X_train, unlabeled_mfccs])
y_combined = np.concatenate([y_train, pseudo_labels_hard])  # Using hard labels
print(f"Combined data shape: {X_combined.shape}, Labels shape: {y_combined.shape}")

# Create combined dataset
combined_dataset = tf.data.Dataset.from_tensor_slices((X_combined, y_combined)).batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Step 3: Define and Train Student Model
def build_crnn_student(input_shape=(130, 13, 1), num_classes=10):
    model = Sequential([
        Conv2D(256, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.5),
        Conv2D(512, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.5),
        Reshape((-1, 512)),
        LSTM(128, return_sequences=False),
        Dropout(0.5),
        Dense(num_classes, activation='softmax', dtype='float32')
    ])
    return model

print("Training student model...")
student_model = build_crnn_student()
student_model.compile(optimizer=Adam(learning_rate=0.0001),
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])
student_model.summary()

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)

history_student = student_model.fit(
    combined_dataset,
    validation_data=val_dataset,
    epochs=250,
    callbacks=[early_stopping, lr_scheduler],
    verbose=1
)

# Evaluate student
test_loss, test_accuracy = student_model.evaluate(test_dataset)
print(f"Student Test Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.4f}")

# Plot results
plt.figure(figsize=(10, 5))
plt.plot(history_student.history['accuracy'], label='Student Train Accuracy')
plt.plot(history_student.history['val_accuracy'], label='Student Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Student Accuracy')
plt.show()

# Save student model
student_model.save(student_model_save_path)
print(f"Student model saved to {student_model_save_path}")

Mounted at /content/drive
TensorFlow version: 2.18.0
GPU available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Loading labeled data...
Original MFCCs length: 9986, Labels length: 9986
Adjusted MFCCs length: 9986, Labels length: 9986
Data shape: (9986, 130, 13), Labels shape: (9986,)
Generating pseudo-unlabeled data...
X_train shape: (6990, 130, 13, 1), X_val shape: (1498, 130, 13, 1), X_test shape: (1498, 130, 13, 1), Unlabeled shape: (9986, 130, 13, 1)
Loading pre-trained teacher model...


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.8165 - loss: 0.5854
Teacher Test Loss: 0.5728, Accuracy: 0.8284
Generating pseudo-labels...
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Combined data shape: (16976, 130, 13, 1), Labels shape: (16976,)
Training student model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/250
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 57ms/step - accuracy: 0.2264 - loss: 2.1995 - val_accuracy: 0.1903 - val_loss: 2.2166 - learning_rate: 1.0000e-04
Epoch 2/250
[1m266/266[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 52ms/step - accuracy: 0.1856 - loss: 2.2320 - val_accuracy: 0.2350 - val_loss: 2.0889 - learning_rate: 1.0000e-04
Epoch 3/250
[1m256/266[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 51ms/step - accuracy: 0.2279 - loss: 2.1318

KeyboardInterrupt: 

In [None]:
Epoch 1/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 23s 57ms/step - accuracy: 0.2156 - loss: 2.2192 - val_accuracy: 0.2150 - val_loss: 2.1923 - learning_rate: 1.0000e-04
Epoch 2/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 14s 54ms/step - accuracy: 0.2021 - loss: 2.1810 - val_accuracy: 0.2530 - val_loss: 2.0986 - learning_rate: 1.0000e-04
Epoch 3/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 14s 54ms/step - accuracy: 0.2375 - loss: 2.1029 - val_accuracy: 0.2590 - val_loss: 2.0483 - learning_rate: 1.0000e-04
Epoch 4/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 21s 55ms/step - accuracy: 0.2625 - loss: 2.0565 - val_accuracy: 0.3231 - val_loss: 1.9873 - learning_rate: 1.0000e-04
Epoch 5/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 20s 54ms/step - accuracy: 0.2929 - loss: 1.9934 - val_accuracy: 0.3371 - val_loss: 1.9093 - learning_rate: 1.0000e-04
Epoch 6/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 21s 54ms/step - accuracy: 0.3000 - loss: 1.9520 - val_accuracy: 0.3585 - val_loss: 1.8674 - learning_rate: 1.0000e-04
Epoch 7/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 14s 54ms/step - accuracy: 0.3279 - loss: 1.8957 - val_accuracy: 0.3652 - val_loss: 1.8122 - learning_rate: 1.0000e-04
Epoch 8/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 14s 54ms/step - accuracy: 0.3443 - loss: 1.8369 - val_accuracy: 0.3879 - val_loss: 1.7659 - learning_rate: 1.0000e-04
Epoch 9/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 14s 54ms/step - accuracy: 0.3684 - loss: 1.7936 - val_accuracy: 0.3798 - val_loss: 1.7553 - learning_rate: 1.0000e-04
...
Epoch 129/250
266/266 ━━━━━━━━━━━━━━━━━━━━ 20s 55ms/step - accuracy: 0.8691 - loss: 0.4178 - val_accuracy: 0.8324 - val_loss: 0.5355 - learning_rate: 3.1250e-06
24/24 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step - accuracy: 0.8156 - loss: 0.5878
Student Test Loss: 0.5926, Accuracy: 0.8164