### Setup modules

In [3]:
import librosa
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
import os

### Define Functions

In [4]:


def extract_features(audio_file, frame_length=0.1, hop_length=0.05):
    """Extract features from audio file"""
    # Load audio
    y, sr = librosa.load(audio_file, sr=None)
    
    # Set frame and hop length in samples
    frame_len_samples = int(frame_length * sr)
    hop_len_samples = int(hop_length * sr)
    
    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, 
                                hop_length=hop_len_samples, 
                                n_fft=frame_len_samples)
    
    # Extract spectral features
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr, 
                                                        hop_length=hop_len_samples,
                                                        n_fft=frame_len_samples)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr,
                                                          hop_length=hop_len_samples,
                                                          n_fft=frame_len_samples)
    
    # Extract temporal features
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y, 
                                                         frame_length=frame_len_samples, 
                                                         hop_length=hop_len_samples)
    
    # Extract energy
    energy = np.array([sum(abs(y[i:i+frame_len_samples]**2)) 
                      for i in range(0, len(y), hop_len_samples)])
    
    # Calculate frame-level features
    mfcc_means = np.mean(mfccs, axis=1)
    mfcc_stds = np.std(mfccs, axis=1)
    
    # Reshape single-value features to make them consistent
    # The issue is that spectral_centroid, spectral_bandwidth, etc. are 1D arrays
    # but we need them to be the same shape as the MFCC features for stacking
    
    # Get statistics as individual values instead of arrays
    spectral_centroid_mean = np.mean(spectral_centroid)
    spectral_bandwidth_mean = np.mean(spectral_bandwidth)
    zcr_mean = np.mean(zero_crossing_rate)
    zcr_std = np.std(zero_crossing_rate)
    energy_mean = np.mean(energy)
    energy_std = np.std(energy)
    
    # Combine all features into a single vector
    feature_vector = np.hstack([
        mfcc_means,
        mfcc_stds,
        spectral_centroid_mean,
        spectral_bandwidth_mean,
        zcr_mean,
        zcr_std,
        energy_mean,
        energy_std
    ])
    
    return feature_vector

# Train model with labeled data
def train_laughter_detector(audio_files, labels):
    # Extract features from all files
    features = []
    for audio_file in audio_files:
        features.append(extract_features(audio_file))
    
    features = np.vstack(features)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        features, labels, test_size=0.2, random_state=42)
    
    # Train model
    model = RandomForestClassifier(n_estimators=100)
    model.fit(X_train, y_train)
    
    # Evaluate
    accuracy = model.score(X_test, y_test)
    print(f"Model accuracy: {accuracy}")
    
    return model


def detect_laughter(model, audio_file, threshold=0.7):
    # Extract features
    features = extract_features(audio_file)
    
    # Reshape features to 2D array with shape [1, n_features]
    features = features.reshape(1, -1)  # Add this line
    
    # Predict probability
    laughter_prob = model.predict_proba(features)[:, 1]
    
    # Apply threshold
    laughter_segments = laughter_prob > threshold
    
    # Apply temporal smoothing (remove isolated segments)
    # Code here would smooth predictions
    
    return laughter_segments

### Setup files

In [6]:
annotation_df = pd.read_csv('/Volumes/T7 Shield/friends/laughter_classifier/friends_s01e01a_laughter_1.49sec_gemini-2.5-pro-preview-03-25.csv')
print(annotation_df.head())

audio_file_list = ['/Volumes/T7 Shield/friends/laughter_classifier/audio_snippets/temp' + str(onset) + '.mp3' for onset in annotation_df.onsets]
print(audio_file_list[:3])

laughter_labels = list(annotation_df.ls_binary.values)
print(laughter_labels[:3])

   Unnamed: 0  onsets minutes_seconds  offsets  ls  ls_binary
0           0       0            0:00     1490  no          0
1           1    1490            0:01     2980  no          0
2           2    2980            0:02     4470  no          0
3           3    4470            0:04     5960  no          0
4           4    5960            0:05     7450  no          0
['/Volumes/T7 Shield/friends/laughter_classifier/audio_snippets/temp0.mp3', '/Volumes/T7 Shield/friends/laughter_classifier/audio_snippets/temp1490.mp3', '/Volumes/T7 Shield/friends/laughter_classifier/audio_snippets/temp2980.mp3']
[0, 0, 0]


### Train Laughter Classification Model

In [7]:
my_classification_model = train_laughter_detector(audio_file_list, laughter_labels)

Model accuracy: 0.8739495798319328


### Apply Model to new data

In [8]:
files = os.listdir('/Volumes/T7 Shield/friends/laughter_classifier/audio_snippets_test/')
real_files = [file for file in files if not file.startswith('._') and not file.startswith('.')]
real_files[:3]

predictions = []
for current_file in range(len(real_files)):
    current_file_path = ('/Volumes/T7 Shield/friends/laughter_classifier/audio_snippets_test/' + real_files[current_file] )
    current_prediction = detect_laughter(my_classification_model, current_file_path)
    predictions.append(current_prediction)
    print(current_prediction)


[False]
[False]
[ True]
[ True]
[ True]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[ True]
[False]
[False]
[False]
[ True]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[ True]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[ True]
[ True]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[ True]
[ True]
[False]
[False]
[False]
[False]
[ True]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[ True]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[ True]
[False]
[False]
[False]
[False]
[False]
[False]
[False]
[ True]
[ True]
[False]
[False]
[False]
[False]
[False]
[ True]
[ True]
[ True]
[ True]
[False]
[ True]
[False]
[False]
[False]
[False]
[False]
[False]
[False]


In [9]:
os.getcwd()

'/Users/schmaelz/Documents/01_GITHUB/nomcomm/GITHUB_PAPERS_WORKING/friends/scripts'

In [10]:
import joblib

# Save it
joblib.dump(my_classification_model, '../data/02_laughter/laughter_classifier_ model.pkl')


['../data/02_laughter/laughter_classifier_ model.pkl']