In [1]:
import os
import pandas as pd
import numpy as np
from google.colab import files
import torch
import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T
from tqdm import tqdm

In [2]:
!curl -L -o /content/common-voice.zip https://www.kaggle.com/api/v1/datasets/download/mozillaorg/common-voice

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 12.0G  100 12.0G    0     0  81.9M      0  0:02:30  0:02:30 --:--:-- 90.1M


In [3]:
!unzip "/content/common-voice.zip"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: cv-valid-train/cv-valid-train/sample-190776.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190777.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190778.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190779.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190780.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190781.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190782.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190783.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190784.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190785.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190786.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190787.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190788.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190789.mp3  
  inflating: cv-valid-train/cv-valid-train/sample-190

In [4]:
!rm "/content/common-voice.zip"

In [83]:
data1 = pd.read_csv("/content/cv-valid-train.csv")
data2 = pd.read_csv("/content/cv-valid-test.csv")
data3 = pd.read_csv("/content/cv-valid-dev.csv")
data4 = pd.read_csv("/content/cv-other-train.csv")
data5 = pd.read_csv("/content/cv-other-test.csv")
data6 = pd.read_csv("/content/cv-other-dev.csv")
data7 = pd.read_csv("/content/cv-invalid.csv")

In [84]:
df = pd.concat([
    data1,
    data2,
    data3,
    data4,
    data5,
    data6,
    data7
])

In [85]:
df

Unnamed: 0,filename,text,up_votes,down_votes,age,gender,accent,duration
0,cv-valid-train/sample-000000.mp3,learn to recognize omens and follow them the o...,1,0,,,,
1,cv-valid-train/sample-000001.mp3,everything in the universe evolved he said,1,0,,,,
2,cv-valid-train/sample-000002.mp3,you came so that you could learn about your dr...,1,0,,,,
3,cv-valid-train/sample-000003.mp3,so now i fear nothing because it was those ome...,1,0,,,,
4,cv-valid-train/sample-000004.mp3,if you start your emails with greetings let me...,3,2,,,,
...,...,...,...,...,...,...,...,...
25398,cv-invalid/sample-025398.mp3,well then we've got a problem,0,4,,,,
25399,cv-invalid/sample-025399.mp3,the boy was surprised at his thoughts,0,6,,,,
25400,cv-invalid/sample-025400.mp3,undefined,1,2,,,,
25401,cv-invalid/sample-025401.mp3,but there was something there in his heart tha...,1,5,,,,


# Feature Extraction Begins Here

In [69]:
# Set up GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [70]:
import os
import torch
import torchaudio
import pandas as pd
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor


In [71]:
!nvidia-smi

Fri May  9 06:42:47 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   31C    P0             50W /  400W |    1319MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [72]:

# 2. Function to extract MFCCs (mean + std)
def extract_mfcc(filepath, n_mfcc=40):
    try:
        waveform, sr = torchaudio.load(filepath)
        if waveform.shape[0] > 1:  # Stereo to mono
            waveform = waveform.mean(dim=0, keepdim=True)

        mfcc_transform = torchaudio.transforms.MFCC(
            sample_rate=sr,
            n_mfcc=n_mfcc,
            melkwargs={"n_fft": 1024, "hop_length": 256, "n_mels": 64}
        )
        mfccs = mfcc_transform(waveform).squeeze()  # Shape: (n_mfcc, time)

        # Compute mean and std across time
        features = {}
        for i in range(n_mfcc):
            features[f"mfcc{i+1}_mean"] = mfccs[i].mean().item()
            features[f"mfcc{i+1}_std"] = mfccs[i].std().item()
        return features

    except Exception as e:
        print(f"Error processing {filepath}: {str(e)}")
        return None

In [73]:
# 3. Construct full paths and extract MFCCs
mfcc_data = []
for filename in tqdm(df["filename"]):
    # Handle path: /content/{dataset}/{dataset}/file.mp3
    dataset_name = filename.split("/")[0]  # e.g., "cv-valid-train"
    full_path = os.path.join("/content", dataset_name, filename)

    mfcc_features = extract_mfcc(full_path, n_mfcc=40)
    if mfcc_features is not None:
        mfcc_data.append(mfcc_features)
    else:
        # Pad with None if extraction fails
        mfcc_data.append({f"mfcc{i+1}_{stat}": None for i in range(40) for stat in ["mean", "std"]})


100%|██████████| 380368/380368 [1:07:49<00:00, 93.47it/s]


In [74]:
# 4. Merge with original DataFrame
mfcc_df = pd.DataFrame(mfcc_data)

In [75]:
mfcc_df

Unnamed: 0,mfcc1_mean,mfcc1_std,mfcc2_mean,mfcc2_std,mfcc3_mean,mfcc3_std,mfcc4_mean,mfcc4_std,mfcc5_mean,mfcc5_std,...,mfcc36_mean,mfcc36_std,mfcc37_mean,mfcc37_std,mfcc38_mean,mfcc38_std,mfcc39_mean,mfcc39_std,mfcc40_mean,mfcc40_std
0,-109.263077,78.854065,100.351173,53.085072,-23.461739,30.664598,16.732719,25.154356,-10.448838,15.435843,...,-1.175668,4.492468,-1.691033,3.388836,-0.573369,3.375101,-0.919570,3.718753,-1.794360,3.468382
1,-391.007935,55.265347,53.875214,46.299213,-11.159916,15.904360,28.885607,12.666367,-13.163248,8.789176,...,-1.706345,2.749205,0.679397,2.673455,-1.329319,2.770438,-0.204571,2.816376,-1.076809,2.276889
2,-138.612091,72.284874,62.830906,42.874897,-17.041483,36.858902,43.759735,26.107368,1.029760,15.474696,...,-0.125119,3.445736,-1.235761,3.325171,-2.057179,3.712299,0.251120,3.185244,-1.354789,3.150853
3,-277.856567,53.554676,74.043274,42.208157,-7.872809,22.241148,27.103874,19.011913,-18.380997,15.086171,...,-0.103494,3.331153,-0.330481,3.340889,-1.102569,2.927078,0.340408,3.120746,-0.645374,2.909502
4,21.527964,61.902905,101.927979,32.316940,-36.192905,25.146778,39.710701,21.193989,-18.424234,16.493565,...,-1.071185,3.622339,-0.227506,3.650861,-0.751787,2.977302,-0.223634,3.334391,0.175214,3.112308
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380363,-652.963867,24.902529,21.398447,6.870189,-42.070938,5.107707,24.569424,3.761482,-18.896727,2.945974,...,-0.486955,1.426654,-0.547786,1.246857,0.498574,1.409750,-0.021753,1.212668,1.598049,1.852672
380364,-350.469116,74.307617,79.713295,40.328350,-9.287087,17.826509,19.482012,17.557531,-1.852102,13.709083,...,-1.995234,2.701185,-0.846011,2.553930,0.011407,2.536131,0.291027,2.499088,1.072422,2.636961
380365,-225.153946,68.051109,97.434219,59.737129,15.728211,23.122917,-7.198732,16.693689,1.735596,13.622336,...,-0.753062,2.778601,-1.193318,2.676164,-1.855811,2.876522,-0.671394,2.565463,0.009431,2.395160
380366,-279.178833,47.907078,56.430923,35.123997,18.582115,24.789991,1.924102,19.086958,-3.769560,13.776646,...,-1.611724,3.330988,-1.847168,2.841657,-2.033417,2.532349,-0.395191,2.893525,1.378712,2.653137


##Rest of the feature Extraction





In [54]:
import torch
import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

In [55]:
# Configuration
SAMPLE_RATE = 16000
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
BATCH_SIZE = 256  # Increased batch size for better GPU utilization
TARGET_LENGTH = 3 * SAMPLE_RATE  # 3 seconds
NUM_WORKERS = 8  # For parallel audio loading

In [56]:
# Initialize GPU transforms
mel_spec = T.MelSpectrogram(
    sample_rate=SAMPLE_RATE,
    n_fft=1024,
    win_length=512,
    hop_length=256,
    n_mels=64
).to(DEVICE)

spectrogram = T.Spectrogram(
    n_fft=1024,
    win_length=512,
    hop_length=256,
    power=2
).to(DEVICE)

In [57]:
# Helper function to safely convert to Python scalar
def safe_item(x):
    if torch.is_tensor(x):
        return x.item()
    return float(x)

In [58]:
def preprocess_audio(wav):
    """Resample and convert to mono if needed"""
    if wav.shape[0] > 1:
        wav = wav.mean(dim=0, keepdim=True)
    return wav


In [59]:
def load_and_pad_audio(filename):
    """Load single audio file and pad/trim to target length"""
    try:
        dataset = filename.split('/')[0]
        full_path = f"/content/{dataset}/{filename}"

        wav, sr = torchaudio.load(full_path)
        if sr != SAMPLE_RATE:
            wav = F.resample(wav, sr, SAMPLE_RATE)

        wav = preprocess_audio(wav)

        # Pad or trim to target length
        if wav.shape[-1] < TARGET_LENGTH:
            pad_amount = TARGET_LENGTH - wav.shape[-1]
            wav = torch.nn.functional.pad(wav, (0, pad_amount), mode='constant', value=0)
        else:
            wav = wav[..., :TARGET_LENGTH]

        return wav.squeeze(0)  # Remove channel dimension for processing

    except Exception as e:
        print(f"Error loading {filename}: {str(e)}")
        return None

In [60]:
def process_batch(filenames):
    """Process a batch of filenames"""
    # Load and preprocess all files in batch
    waveforms = []
    valid_files = []

    for filename in filenames:
        wav = load_and_pad_audio(filename)
        if wav is not None:
            waveforms.append(wav)
            valid_files.append(filename)

    if not waveforms:
        return []

    # Stack waveforms on GPU
    waveforms_batch = torch.stack(waveforms).to(DEVICE)
    features_batch = []

    with torch.no_grad():


        # Mel Features (batch processing)
        mel = mel_spec(waveforms_batch.unsqueeze(1))

        # Process each sample in batch
        for i in range(waveforms_batch.shape[0]):
            features = {}



            # Mel features
            features['mel_energy_mean'] = safe_item(mel[i].mean())
            features['mel_energy_std'] = safe_item(mel[i].std())

            # Process remaining features
            waveform = waveforms_batch[i]
            try:
                # Spectral Features
                spec = spectrogram(waveform.unsqueeze(0))
                freqs = torch.linspace(0, SAMPLE_RATE//2, spec.shape[-1], device=DEVICE)
                spec_sum = spec.sum(dim=-1) + 1e-8
                spectral_centroid = (spec * freqs).sum(dim=-1) / spec_sum

                spec_diff = torch.diff(spec.float(), dim=-1)
                spectral_flux = safe_item(torch.mean(spec_diff**2))

                features.update({
                    'spectral_centroid_mean': safe_item(spectral_centroid.mean()),
                    'spectral_centroid_std': safe_item(spectral_centroid.std()),
                    'spectral_flux': spectral_flux
                })

                # Energy Features
                frame_length = 512
                hop_length = 256
                frames = waveform.unfold(0, frame_length, hop_length)
                rms = torch.sqrt(torch.mean(frames**2, dim=1))

                zcr = torch.sum(torch.abs(torch.diff((frames > 0).float(), dim=1)), dim=1) / (frame_length-1)

                features.update({
                    'rms_mean': safe_item(rms.mean()),
                    'rms_std': safe_item(rms.std()),
                    'zero_crossing_rate': safe_item(zcr.mean()),
                    'energy_variability': safe_item(rms.std() / (rms.mean() + 1e-8))
                })

                # Pitch Features
                pitch = F.detect_pitch_frequency(waveform.unsqueeze(0).unsqueeze(0), SAMPLE_RATE)
                pitch = pitch[pitch > 0]
                if len(pitch) > 0:
                    pitch_diff = torch.diff(pitch.float())
                    features.update({
                        'pitch_mean': safe_item(pitch.mean()),
                        'pitch_std': safe_item(pitch.std()),
                        'pitch_range': safe_item(pitch.max() - pitch.min()),
                        'jitter_local': safe_item(torch.mean(torch.abs(pitch_diff)) / (pitch.mean() + 1e-8)),
                        'voiced_fraction': safe_item(len(pitch) / (waveform.shape[-1] // 256))
                    })
                else:
                    features.update({
                        'pitch_mean': 0.0,
                        'pitch_std': 0.0,
                        'pitch_range': 0.0,
                        'jitter_local': 0.0,
                        'voiced_fraction': 0.0
                    })

                features['filename'] = valid_files[i]
                features_batch.append(features)

            except Exception as e:
                print(f"Error processing {valid_files[i]}: {str(e)}")
                continue

    return features_batch


In [61]:
def process_files(df, output_prefix="xtra_features"):
    all_features = []
    filenames = df['filename'].tolist()

    # Process in batches with progress bar
    for i in tqdm(range(0, len(filenames), BATCH_SIZE), desc="Processing"):
        batch_filenames = filenames[i:i+BATCH_SIZE]
        batch_features = process_batch(batch_filenames)
        all_features.extend(batch_features)

        # Save periodically
        if i % (10 * BATCH_SIZE) == 0 and i > 0:
            pd.DataFrame(all_features).to_csv(f"{output_prefix}_partial.csv", index=False)

    # Save final results to a single file
    features_df = pd.DataFrame(all_features)
    features_df.to_csv(f"{output_prefix}_final.csv", index=False)

In [62]:
# 4. Run Pipeline
if __name__ == "__main__":
    # Check GPU memory
    if torch.cuda.is_available():
        print(f"GPU Memory Available: {torch.cuda.get_device_properties(0).total_memory/1e9:.2f}GB")
    process_files(df)

GPU Memory Available: 42.47GB


Processing: 100%|██████████| 1486/1486 [4:30:57<00:00, 10.94s/it]


In [64]:
extra_features=pd.read_csv("/content/xtra_features_final.csv")

In [65]:
extra_features.head()

Unnamed: 0,mel_energy_mean,mel_energy_std,spectral_centroid_mean,spectral_centroid_std,spectral_flux,rms_mean,rms_std,zero_crossing_rate,energy_variability,pitch_mean,pitch_std,pitch_range,jitter_local,voiced_fraction,filename
0,10.911239,61.937744,4075.480469,737.834351,111.735184,0.061758,0.058081,0.143636,0.940466,385.530823,746.59613,2576.27124,0.051303,1.524064,cv-valid-train/sample-000000.mp3
1,0.015948,0.118071,5226.896484,838.178772,0.000485,0.001947,0.002603,0.182459,1.336576,205.800476,176.493576,891.891907,0.073962,1.524064,cv-valid-train/sample-000001.mp3
2,5.916816,67.486755,4314.369141,766.814819,152.761978,0.045321,0.043033,0.183722,0.949513,335.032013,408.392059,2535.519287,0.131158,1.524064,cv-valid-train/sample-000002.mp3
3,0.136712,1.671695,4588.571289,969.868591,0.096589,0.005201,0.007962,0.174663,1.530913,186.870331,46.697372,157.71991,0.040336,1.524064,cv-valid-train/sample-000003.mp3
4,352.064484,2071.311279,3675.279053,848.967041,76599.890625,0.40733,0.254015,0.128401,0.62361,241.868256,159.871399,949.878418,0.023514,1.524064,cv-valid-train/sample-000004.mp3


In [80]:
features_only= pd.concat([extra_features, mfcc_df], axis=1)

In [90]:
features_only.to_csv("features_only.csv")

In [67]:
df.head()

Unnamed: 0,filename,text,up_votes,down_votes,age,gender,accent,duration,mel_energy_mean,mel_energy_std,...,spectral_flux,rms_mean,rms_std,zero_crossing_rate,energy_variability,pitch_mean,pitch_std,pitch_range,jitter_local,voiced_fraction
0,cv-valid-train/sample-000000.mp3,learn to recognize omens and follow them the o...,1,0,,,,,10.911239,61.937744,...,111.735184,0.061758,0.058081,0.143636,0.940466,385.530823,746.59613,2576.27124,0.051303,1.524064
1,cv-valid-train/sample-000001.mp3,everything in the universe evolved he said,1,0,,,,,0.015948,0.118071,...,0.000485,0.001947,0.002603,0.182459,1.336576,205.800476,176.493576,891.891907,0.073962,1.524064
2,cv-valid-train/sample-000002.mp3,you came so that you could learn about your dr...,1,0,,,,,5.916816,67.486755,...,152.761978,0.045321,0.043033,0.183722,0.949513,335.032013,408.392059,2535.519287,0.131158,1.524064
3,cv-valid-train/sample-000003.mp3,so now i fear nothing because it was those ome...,1,0,,,,,0.136712,1.671695,...,0.096589,0.005201,0.007962,0.174663,1.530913,186.870331,46.697372,157.71991,0.040336,1.524064
4,cv-valid-train/sample-000004.mp3,if you start your emails with greetings let me...,3,2,,,,,352.064484,2071.311279,...,76599.890625,0.40733,0.254015,0.128401,0.62361,241.868256,159.871399,949.878418,0.023514,1.524064


In [77]:
final_df= pd.concat([df, mfcc_df], axis=1)

In [78]:
final_df

Unnamed: 0,filename,text,up_votes,down_votes,age,gender,accent,duration,mel_energy_mean,mel_energy_std,...,mfcc36_mean,mfcc36_std,mfcc37_mean,mfcc37_std,mfcc38_mean,mfcc38_std,mfcc39_mean,mfcc39_std,mfcc40_mean,mfcc40_std
0,cv-valid-train/sample-000000.mp3,learn to recognize omens and follow them the o...,1,0,,,,,1.091124e+01,6.193774e+01,...,-1.175668,4.492468,-1.691033,3.388836,-0.573369,3.375101,-0.919570,3.718753,-1.794360,3.468382
1,cv-valid-train/sample-000001.mp3,everything in the universe evolved he said,1,0,,,,,1.594811e-02,1.180711e-01,...,-1.706345,2.749205,0.679397,2.673455,-1.329319,2.770438,-0.204571,2.816376,-1.076809,2.276889
2,cv-valid-train/sample-000002.mp3,you came so that you could learn about your dr...,1,0,,,,,5.916816e+00,6.748676e+01,...,-0.125119,3.445736,-1.235761,3.325171,-2.057179,3.712299,0.251120,3.185244,-1.354789,3.150853
3,cv-valid-train/sample-000003.mp3,so now i fear nothing because it was those ome...,1,0,,,,,1.367119e-01,1.671695e+00,...,-0.103494,3.331153,-0.330481,3.340889,-1.102569,2.927078,0.340408,3.120746,-0.645374,2.909502
4,cv-valid-train/sample-000004.mp3,if you start your emails with greetings let me...,3,2,,,,,3.520645e+02,2.071311e+03,...,-1.071185,3.622339,-0.227506,3.650861,-0.751787,2.977302,-0.223634,3.334391,0.175214,3.112308
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380363,cv-invalid/sample-025398.mp3,well then we've got a problem,0,4,,,,,9.480533e-09,3.978150e-08,...,-0.486955,1.426654,-0.547786,1.246857,0.498574,1.409750,-0.021753,1.212668,1.598049,1.852672
380364,cv-invalid/sample-025399.mp3,the boy was surprised at his thoughts,0,6,,,,,1.099137e-01,7.406676e-01,...,-1.995234,2.701185,-0.846011,2.553930,0.011407,2.536131,0.291027,2.499088,1.072422,2.636961
380365,cv-invalid/sample-025400.mp3,undefined,1,2,,,,,6.226113e+00,7.599648e+01,...,-0.753062,2.778601,-1.193318,2.676164,-1.855811,2.876522,-0.671394,2.565463,0.009431,2.395160
380366,cv-invalid/sample-025401.mp3,but there was something there in his heart tha...,1,5,,,,,1.727122e-01,2.894363e+00,...,-1.611724,3.330988,-1.847168,2.841657,-2.033417,2.532349,-0.395191,2.893525,1.378712,2.653137


In [88]:
final_df.head(10)

Unnamed: 0,filename,text,up_votes,down_votes,age,gender,accent,duration,mel_energy_mean,mel_energy_std,...,mfcc36_mean,mfcc36_std,mfcc37_mean,mfcc37_std,mfcc38_mean,mfcc38_std,mfcc39_mean,mfcc39_std,mfcc40_mean,mfcc40_std
0,cv-valid-train/sample-000000.mp3,learn to recognize omens and follow them the o...,1,0,,,,,10.911239,61.937744,...,-1.175668,4.492468,-1.691033,3.388836,-0.573369,3.375101,-0.91957,3.718753,-1.79436,3.468382
1,cv-valid-train/sample-000001.mp3,everything in the universe evolved he said,1,0,,,,,0.015948,0.118071,...,-1.706345,2.749205,0.679397,2.673455,-1.329319,2.770438,-0.204571,2.816376,-1.076809,2.276889
2,cv-valid-train/sample-000002.mp3,you came so that you could learn about your dr...,1,0,,,,,5.916816,67.486755,...,-0.125119,3.445736,-1.235761,3.325171,-2.057179,3.712299,0.25112,3.185244,-1.354789,3.150853
3,cv-valid-train/sample-000003.mp3,so now i fear nothing because it was those ome...,1,0,,,,,0.136712,1.671695,...,-0.103494,3.331153,-0.330481,3.340889,-1.102569,2.927078,0.340408,3.120746,-0.645374,2.909502
4,cv-valid-train/sample-000004.mp3,if you start your emails with greetings let me...,3,2,,,,,352.064484,2071.311279,...,-1.071185,3.622339,-0.227506,3.650861,-0.751787,2.977302,-0.223634,3.334391,0.175214,3.112308
5,cv-valid-train/sample-000005.mp3,a shepherd may like to travel but he should ne...,1,0,twenties,female,us,,0.145422,1.002985,...,0.713001,3.564744,-0.381806,2.981815,-0.579758,2.881244,0.485405,2.909983,0.077802,2.991018
6,cv-valid-train/sample-000006.mp3,night fell and an assortment of fighting men a...,3,0,,,,,13.055654,71.468925,...,-3.162733,3.726125,1.042821,3.111831,-2.308166,3.178723,1.11351,2.93134,-2.784063,3.240911
7,cv-valid-train/sample-000007.mp3,i heard a faint movement under my feet,2,1,,,,,34.683773,160.207123,...,-1.249133,4.039364,0.216817,3.555021,0.290781,4.15384,-0.442891,3.73807,-1.152284,3.396523
8,cv-valid-train/sample-000008.mp3,put jackie right on the staff,3,0,seventies,male,us,,0.385244,5.618464,...,-2.040691,3.768293,-0.501126,3.106052,0.682313,3.58716,1.008271,2.845189,-1.269672,2.983612
9,cv-valid-train/sample-000009.mp3,when he speaks in our language i can interpret...,1,0,,,,,35.240356,199.582001,...,-2.480155,3.367556,-0.083058,3.372272,-3.237001,3.988638,-1.17094,3.286232,-0.383051,3.088122


In [91]:
final_df.to_csv("final_dataset.csv")