# Prepare Anesthetized DataFrame

```
Here, we will prepare a .csv dataframe from the h5files in the original dataset.

Notes
-----
1. Electrode locations are not part of the CRCNS aa-4 dataset and must be acquired separately.
2. We are taking {field L/CML complex} as A1 and {NCM, CMM} as A2 (Robotka et al 2023).
3. For WhiWhi4522M, Site5 uses different stimuli than other sites. Recordings from this site have been dropped.
4. Noise stimuli have been dropped.
```

## Import Modules

In [1]:
import numpy as np
import pandas as pd
import h5py
import os

import warnings
warnings.filterwarnings("ignore")

## Utility Functions

```
In the dataset, there are "raw" electrode files, and spike-sorted single-unit files. We only want to use the spike-sorted files. Electrode location files are not part of the original dataset and must be placed separately in the 'data/anesthetized' directory.
```

In [2]:
def isSpikeSorted(filename):
    if filename.endswith('.h5') and '_ss' in filename:
        return True
    return False

In [3]:
def isLocFile(filename):
    if filename.endswith('.txt') and 'Histo' in filename:
        return True
    return False

## Create DataFrame

### Collect Data

In [4]:
data_root = os.path.join('..', '..', 'data', 'anesthetized')
birds = [d for d in os.scandir(data_root) if os.path.isdir(d)]
birds

[<DirEntry 'BlaBro09xxF'>,
 <DirEntry 'GreBlu9508M'>,
 <DirEntry 'LblBlu2028M'>,
 <DirEntry 'WhiBlu5396M'>,
 <DirEntry 'WhiWhi4522M'>,
 <DirEntry 'YelBlu6903F'>]

In [5]:
dict_list = []    # list of data dicts
# loop through subjects
print("Collecting data...")
for bird in birds:
    print("Currently working on: ", bird.name)
    # loop through data files
    for data_file in os.scandir(bird.path):
        # skip over raw electrode files
        if not isSpikeSorted(data_file.name):
            continue
        
        # read data_file for a given single_unit
        single_unit = h5py.File(data_file, 'r')
        main_key = list(single_unit.keys())[0]
        assert 'Call' in main_key
        
        # collect single_unit data for all stimuli
        stim_numbers = single_unit[main_key].keys()
        for stim_number in stim_numbers:
            attributes = single_unit[main_key][stim_number].attrs.keys()
            # loop through attributes
            data_dict = {}
            data_dict['subject'] = bird.name
            data_dict['site'] = str(single_unit.attrs['site'], 'utf-8')
            data_dict['electrode'] = data_file.name[:-7] + '.h5'
            data_dict['single_unit'] = data_file.name[:-3]
            data_dict['sortType'] = str(single_unit.attrs['sortType'], 'utf-8')
            data_dict['stim_number'] = stim_number
            
            for attr in attributes:
                data_dict[attr] = single_unit[main_key][stim_number].attrs[attr]
                if not isinstance(data_dict[attr], float):
                    try:
                        # string types are stored as byte-strings in the dataset
                        data_dict[attr] = str(single_unit[main_key][stim_number].attrs[attr], 'utf-8')
                    except:
                        pass

            wavefile = str(single_unit[main_key][stim_number].attrs['original_wavfile'], 'utf-8')
            data_dict['original_wavfile'] = wavefile[wavefile.rindex('/')+1:]
            
            dict_list.append(data_dict)
                
print("Finished!")

Collecting data...
Currently working on:  BlaBro09xxF
Currently working on:  GreBlu9508M
Currently working on:  LblBlu2028M
Currently working on:  WhiBlu5396M
Currently working on:  WhiWhi4522M
Currently working on:  YelBlu6903F
Finished!


In [6]:
df = pd.DataFrame(dict_list).drop_duplicates()
df.sample(5)

Unnamed: 0,subject,site,electrode,single_unit,sortType,stim_number,callerAge,callid,original_wavfile,pvalue,stim_class,stim_duration,stim_md5,stim_source,stim_source_sex,stim_type,tdt_wavfile,tstat,zscore,strfid
106592,WhiWhi4522M,Site3,Site3_L1900R1800_e26_s0.h5,Site3_L1900R1800_e26_s0_ss1,single,44,A,Di,BlaBla0506_MAF_Di_1.wav,0.947821,Con,1.674613,225e52a79797aeebdc8d1839f54ad60c,familiar,m,call,/auto/fdata/julie/Stims/WhiWhi4522M/wavfiles/s...,0.067292,0.02128,
56961,GreBlu9508M,Site4,Site4_L1500R1900_e22_s0.h5,Site4_L1500R1900_e22_s0_ss1,single,121,A,Te,BlaLbl8026_FAS_Te_5-9-3.wav,0.008634,Con,2.5,a82b491f640cc358b35cac9321c71dee,unfamiliar,f,call,/auto/fdata/julie/Stims/GreBlu9508M/wavfiles/s...,3.341897,1.056801,
158123,YelBlu6903F,Site3,Site3_L1200R1200_e9_s0.h5,Site3_L1200R1200_e9_s0_ss3,single,122,A,Ne,BlaLbl8026_FAS_Ne_9-2-5.wav,0.976641,Con,2.5,c016714cbf5b566d134572e6a918c08c,unfamiliar,f,call,/auto/fdata/julie/Stims/YelBlu6903F/wavfiles/s...,-0.029954,-0.008647,
99151,WhiWhi4522M,Site2,Site2_L1500R1450_e30_s0.h5,Site2_L1500R1450_e30_s0_ss3,single,70,,,STRFxx0000__Ri_4.wav,0.538679,Syn,2.000041,846945230dbbbd62608f9a356a5ec6ef,,c,mlnoise,/auto/fdata/julie/Stims/WhiWhi4522M/wavfiles/s...,0.639104,0.202102,Call
64667,GreBlu9508M,Site5,Site5_L1800R2100_e17_s0.h5,Site5_L1800R2100_e17_s0_ss1,single,277,,,WhiLbl0010_MAS_So_3.wav,0.166253,Con,2.639141,c469fc5eecc8bd230ff7b404ebd1464d,unfamiliar,m,song,/auto/fdata/julie/Stims/GreBlu9508M/wavfiles/s...,1.523026,0.507675,Call


### Add Hemisphere Data

In [7]:
df_list = []
for file in os.scandir(data_root):
    if not isLocFile(file.name):
        continue
    data_frame = pd.read_csv(file.path, delim_whitespace=True,
                          names=['electrode', 'brain_region', 'x?', 'y?', 'hemisphere', 'unknown?'])
    data_frame = data_frame[['electrode', 'brain_region', 'hemisphere']]
    data_frame['subject'] = file.name[13:24]
    df_list.append(data_frame)

loc_df = pd.concat(df_list)
loc_df.sample(5)

Unnamed: 0,electrode,brain_region,hemisphere,subject
3,Site1_L500R400_e13_s0.h5,NCM,L,YelBlu6903F
94,Site3_L1250R1650_e7_s0.h5,NCM,L,GreBlu9508M
60,Site3_L1900R1800_e22_s0.h5,CMM,R,WhiWhi4522M
19,Site1_L500R400_e4_s0.h5,L2B,L,YelBlu6903F
100,Site4_L1500R1500_e23_s0.h5,L1,R,YelBlu6903F


In [8]:
df = df.merge(loc_df, on=['subject', 'electrode'], how='left')
del df['electrode']
df.sample(5)

Unnamed: 0,subject,site,single_unit,sortType,stim_number,callerAge,callid,original_wavfile,pvalue,stim_class,...,stim_md5,stim_source,stim_source_sex,stim_type,tdt_wavfile,tstat,zscore,strfid,brain_region,hemisphere
43380,GreBlu9508M,Site2,Site2_L1100R1450_e7_s0_ss2,single,60,C,LT,LblBlu1630_FCS_LT_6-2-3.wav,0.121521,Con,...,cdc2c1491670ea2cfc44841676d2a5ea,unfamiliar,f,call,/auto/fdata/julie/Stims/GreBlu9508M/wavfiles/s...,1.709535,0.540603,,HP-NCM,L
32886,GreBlu9508M,Site1,Site1_L750R1100_e9_s0_ss1,single,96,,,STRFxx0000__So_8.wav,0.024896,Con,...,78cb3b8daaac8c4977ef6ba23939a909,unfamiliar,m,song,/auto/fdata/julie/Stims/GreBlu9508M/wavfiles/s...,2.687559,0.849881,Call,NCM,L
41821,GreBlu9508M,Site2,Site2_L1100R1450_e3_s0_ss2,single,61,C,LT,LblBlu1630_FCS_LT_7-8-9.wav,0.003198,Con,...,3ace623f051ba02c9e9fbce997a4d625,unfamiliar,f,call,/auto/fdata/julie/Stims/GreBlu9508M/wavfiles/s...,3.981697,1.259123,,L1,L
53911,GreBlu9508M,Site4,Site4_L1500R1900_e13_s0_ss2,single,61,C,LT,LblBlu1630_FCS_LT_7-8-9.wav,0.099555,Con,...,3ace623f051ba02c9e9fbce997a4d625,unfamiliar,f,call,/auto/fdata/julie/Stims/GreBlu9508M/wavfiles/s...,1.835921,0.580569,,L,L
96647,WhiWhi4522M,Site2,Site2_L1500R1450_e22_s0_ss1,single,55,A,Th,BlaBla0506_MAF_Th_6-5-7.wav,0.645325,Con,...,aae98dbbe085fbd1927ef41d896d1f26,familiar,m,call,/auto/fdata/julie/Stims/WhiWhi4522M/wavfiles/s...,0.474514,0.143071,,CMM,R


In [9]:
df['brain_region'].isnull().sum()/len(df['brain_region'])

0.04357837537070032

## Clean DataFrame
### Drop Unnecessary Columns

In [10]:
df.columns

Index(['subject', 'site', 'single_unit', 'sortType', 'stim_number',
       'callerAge', 'callid', 'original_wavfile', 'pvalue', 'stim_class',
       'stim_duration', 'stim_md5', 'stim_source', 'stim_source_sex',
       'stim_type', 'tdt_wavfile', 'tstat', 'zscore', 'strfid', 'brain_region',
       'hemisphere'],
      dtype='object')

In [11]:
df = df[['subject', 'site', 'single_unit', 'sortType', 'stim_number',
        'original_wavfile', 'callerAge', 'callid', 'stim_duration', 'stim_source', 
        'stim_source_sex', 'stim_type', 'brain_region', 'hemisphere']]
df.sample(5)

Unnamed: 0,subject,site,single_unit,sortType,stim_number,original_wavfile,callerAge,callid,stim_duration,stim_source,stim_source_sex,stim_type,brain_region,hemisphere
60807,GreBlu9508M,Site4,Site4_L1500R1900_e3_s0_ss1,single,67,LblBlu2028_MCS_LT_9-8-10.wav,C,LT,2.5,unfamiliar,m,call,NCM,L
130559,YelBlu6903F,Site1,Site1_L500R400_e12_s0_ss2,single,289,YelGre5275_MAF_Te_10-3-8.wav,A,Te,2.5,familiar,m,call,NCM,L
62511,GreBlu9508M,Site4,Site4_L1500R1900_e9_s0_ss3,noise,81,STRFxx0000__So_12.wav,,,1.657041,unfamiliar,m,song,NCM,L
101588,WhiWhi4522M,Site3,Site3_L1900R1800_e12_s0_ss1,single,214,WhiBlu4818_MAF_Di_1.wav,A,Di,0.283731,familiar,m,call,NCM,L
111388,WhiWhi4522M,Site4,Site4_L2100R2050_e12_s0_ss3,noise,160,GraLbl0457_MAS_Ag_1.wav,A,Ag,0.581715,unfamiliar,m,call,NCM,L


## Preprocess DataFrame

### vocid = callid + song

In [12]:
df['vocid'] = np.where(df['stim_type']=='call', df['callid'], df['stim_type'])    # where True yield x, otherwise y
df = df[df.vocid != 'mlnoise']
df.sample(5)

Unnamed: 0,subject,site,single_unit,sortType,stim_number,original_wavfile,callerAge,callid,stim_duration,stim_source,stim_source_sex,stim_type,brain_region,hemisphere,vocid
76119,LblBlu2028M,Site1,Site1_L1400R1400_e19_s0_ss1,singl�,249,WhiLbl0010_MAS_So_2.wav,,,2.313017,unfamiliar,m,song,CM,L,song
166424,YelBlu6903F,Site4,Site4_L1500R1500_e32_s0_ss1,single,38,BlaBla0506_MAF_Ag_1.wav,A,Ag,1.473212,familiar,m,call,NCM,R,Ag
72872,GreBlu9508M,Site5,Site5_L1800R2100_e7_s0_ss1,single,42,BlaBla0506_MAF_DC_4-6-9.wav,A,DC,2.5,familiar,m,call,NCM,L,DC
147486,YelBlu6903F,Site2,Site2_L1000R900_e9_s0_ss3,single,63,LblBlu2028_MCS_Be_2.wav,C,Be,0.811379,unfamiliar,m,call,NCM,L,Be
120967,WhiWhi4522M,Site5,Site5_L2450R2350_e12_s0_ss2,single,205,PurRas20dd_FAS_Ne_9-3-5.wav,A,Ne,2.5,unfamiliar,f,call,NCM,L,Ne


In [13]:
df['vocid'].value_counts()

song    33675
Te      23730
DC      21833
Ne      19196
Ag      15095
Be      14970
Th      10279
LT       9654
Di       8163
Wh        392
Name: vocid, dtype: int64

### Filter Noise and Multi-Units

In [14]:
df['sortType'].value_counts()

single    137035
noise      12732
multi       6952
singl        268
Name: sortType, dtype: int64

In [15]:
df['sortType'].unique()[3]

'singl\x00'

In [16]:
df['sortType'] = df['sortType'].replace({df['sortType'].unique()[3]: 'single'}, regex=True)

In [17]:
df = df[(df['sortType']=='single')]
df.sample(5)

Unnamed: 0,subject,site,single_unit,sortType,stim_number,original_wavfile,callerAge,callid,stim_duration,stim_source,stim_source_sex,stim_type,brain_region,hemisphere,vocid
149052,YelBlu6903F,Site3,Site3_L1200R1200_e14_s0_ss1,single,81,STRFxx0000__So_12.wav,,,1.657041,unfamiliar,m,song,NCM-L3,L,song
47116,GreBlu9508M,Site3,Site3_L1250R1650_e1_s0_ss1,single,276,WhiLbl0010_MAS_So_2.wav,,,2.313017,unfamiliar,m,song,HP-CMM,L,song
121851,WhiWhi4522M,Site5,Site5_L2450R2350_e16_s0_ss1,single,81,STRFxx0000__So_14.wav,,,2.539035,unfamiliar,m,song,NCM,L,song
78925,LblBlu2028M,Site1,Site1_L1400R1400_e5_s0_ss3,single,247,WhiLbl0010_MAS_Ne_9-3-7.wav,A,Ne,2.5,unfamiliar,m,call,NCM,R,Ne
69722,GreBlu9508M,Site5,Site5_L1800R2100_e2_s0_ss2,single,262,WhiGra0114_FAF_Th_5-3-7.wav,A,Th,2.5,familiar,f,call,NCM,L,Th


In [18]:
df['sortType'].value_counts()

single    137303
Name: sortType, dtype: int64

### Add Column for A1/A2

In [19]:
df['brain_region'].unique()

array(['NCM', 'L3-2b', 'L3', 'L1', 'CM', 'L', 'CMM', 'CML', 'L2b', 'L2B',
       'L2A', 'L2A-L1', 'HP-NCM', 'HP', 'CMM-HP', 'L1-CML', 'HP-L1',
       'HP-CMM', 'NCM-L', 'L1-CMM', 'L3-L2B', nan, 'CML-L1', 'L3-L2A',
       'NCM-HP', 'NCM-L3', 'HP-L2A', 'L2B-NCM', 'CMM-L1', 'L2A-L2B',
       'L1-L2A', 'NCM-L2A'], dtype=object)

In [20]:
'''
Field L regions L1, L2, L3, and lateral caudal mesopallium (CM) form the primary auditory cortex in birds (13, 14). 
Field L2a and L2b are the intermediate (thalamorecipient) regions. 
Field L1 and CML are the superficial regions, and field L3 is the deep region. 
Caudal nidopallium (NC) is a secondary auditory area. 
Area L (13) has cytoarchitecture similar to L2b but is not known to receive thalamic input. 

Reference:
Coding principles of the canonical cortical microcircuitin the avian brain
Ana Calabreseaand Sarah M. N. Woolley

Note: Based on above, removed L from A1.
'''
cortex_dict = {
    'L3-2b': 'A1',
    'L3': 'A1',
    'L1': 'A1',
    'L2b': 'A1',
    'L2B': 'A1',
    'L2A': 'A1',
    'L2A-L1': 'A1',
    'L3-L2B': 'A1',
    'L3-L2A': 'A1',
    'L2A-L2B': 'A1',
    'L1-L2A': 'A1',
    'CML': 'A1',
    'L1-CML': 'A1',
    'CML-L1': 'A1',
    
    'NCM': 'A2',
    'CMM': 'A2',
}
df['auditory_cortex'] = df['brain_region'].replace(cortex_dict)
df['auditory_cortex'].value_counts()/len(df) * 100

A2         48.150441
A1         31.653351
L           2.093181
HP-L1       1.938049
L1-CMM      1.837542
HP-NCM      1.670757
HP-CMM      1.579718
NCM-L       1.223571
CM          1.197352
HP          0.873979
CMM-HP      0.701369
CMM-L1      0.694814
NCM-L2A     0.606687
L2B-NCM     0.264379
NCM-L3      0.261466
HP-L2A      0.176253
NCM-HP      0.174796
Name: auditory_cortex, dtype: float64

In [21]:
df['auditory_cortex'] = df['brain_region'].map(cortex_dict)
df['auditory_cortex'].value_counts()/len(df) * 100

A2    48.150441
A1    31.653351
Name: auditory_cortex, dtype: float64

### WhiWhi: Remove Site5 
```
In WhiWhi, site5 stimuli are not used on other sites (leading to reduced set intersection). This is true for stim_numbers as well as original_wavefiles.
```

In [22]:
df[(df['subject']=='WhiWhi4522M')&(df['site']=='Site1')]['original_wavfile'].unique()

array(['WhiWhi1415_FAF_Ne_5-4-2.wav', 'WhiWhi1415_FAF_Ne_7-9-6.wav',
       'WhiWhi1415_FAF_Ne_8-3-10.wav', 'WhiWhi1415_FAF_Te_10-7-2.wav',
       'WhiWhi1415_FAF_Te_8-5-6.wav', 'WhiWhi1415_FAF_Te_9-3-4.wav',
       'WhiWhi1415_FAF_Th_4-9-2.wav', 'WhiWhi1415_FAF_Th_5-8-6.wav',
       'WhiWhi1415_FAF_Th_7-3-10.wav', 'BluRas07dd_FAS_Ag_1.wav',
       'BluRas07dd_FAS_Ag_2.wav', 'BluRas07dd_FAS_Ag_3.wav',
       'BluRas07dd_FAS_DC_10-3-6.wav', 'BluRas07dd_FAS_DC_2-5-7.wav',
       'BluRas07dd_FAS_DC_8-9-4.wav', 'BluRas07dd_FAS_Ne_2-6-8.wav',
       'BluRas07dd_FAS_Ne_3-9-10.wav', 'BluRas07dd_FAS_Ne_7-5-4.wav',
       'BluRas07dd_FAS_Te_2-7-9.wav', 'BluRas07dd_FAS_Te_4-10-5.wav',
       'BluRas07dd_FAS_Te_6-3-8.wav', 'GraGre1001_MCS_Be_1.wav',
       'GraGre1001_MCS_Be_2.wav', 'GraGre1001_MCS_Be_3.wav',
       'GraLbl0457_MAS_Ag_1.wav', 'GraLbl0457_MAS_Ag_2.wav',
       'GraLbl0457_MAS_Ag_3.wav', 'GraLbl0457_MAS_DC_2-4-7.wav',
       'GraLbl0457_MAS_DC_3-10-9.wav', 'GraLbl0457_MAS_DC_8-6-5.

In [23]:
df[(df['subject']=='WhiWhi4522M')&(df['site']=='Site5')]['original_wavfile'].unique()

array(['WhiWhi1415_FAF_Ne_5-4-2.wav', 'WhiWhi1415_FAF_Ne_7-9-6.wav',
       'WhiWhi1415_FAF_Ne_8-3-10.wav', 'WhiWhi1415_FAF_Te_10-7-2.wav',
       'WhiWhi1415_FAF_Te_8-5-6.wav', 'WhiWhi1415_FAF_Te_9-3-4.wav',
       'WhiWhi1415_FAF_Th_4-9-2.wav', 'WhiWhi1415_FAF_Th_5-8-6.wav',
       'WhiWhi1415_FAF_Th_7-3-10.wav', 'GraGre0813_FCS_Be_1.wav',
       'GraGre0813_FCS_Be_2.wav', 'GraGre0813_FCS_Be_3.wav',
       'LblRas1800_MCS_Be_1.wav', 'LblRas1800_MCS_Be_2.wav',
       'LblRas1800_MCS_Be_3.wav', 'LblRas1800_MCS_LT_4-2-6.wav',
       'LblRas1800_MCS_LT_7-5-3.wav', 'PurRas20dd_FAS_Ag_1.wav',
       'PurRas20dd_FAS_Ag_2.wav', 'PurRas20dd_FAS_Ag_3.wav',
       'PurRas20dd_FAS_DC_10-7-3.wav', 'PurRas20dd_FAS_DC_2-6-4.wav',
       'PurRas20dd_FAS_DC_5-9-8.wav', 'PurRas20dd_FAS_Di_1.wav',
       'PurRas20dd_FAS_Di_2.wav', 'PurRas20dd_FAS_Di_3.wav',
       'PurRas20dd_FAS_Ne_10-4-7.wav', 'PurRas20dd_FAS_Ne_6-8-2.wav',
       'PurRas20dd_FAS_Ne_9-3-5.wav', 'PurRas20dd_FAS_Te_5-2-9.wav',
       '

In [24]:
df[(df['subject']=='WhiWhi4522M')&(df['site']=='Site1')]['original_wavfile'].unique() == df[(df['subject']=='WhiWhi4522M')&(df['site']=='Site2')]['original_wavfile'].unique()

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

In [25]:
df[(df['subject']=='WhiWhi4522M')&(df['site']=='Site1')]['original_wavfile'].unique() == df[(df['subject']=='WhiWhi4522M')&(df['site']=='Site5')]['original_wavfile'].unique()

False

In [26]:
df = df[~((df['subject']=='WhiWhi4522M')&(df['site']=='Site5'))]

## Verify and Save

In [27]:
df = df.dropna(subset=['auditory_cortex'])

In [28]:
df.isnull().sum()/len(df)

subject             0.000000
site                0.000000
single_unit         0.000000
sortType            0.000000
stim_number         0.000000
original_wavfile    0.000000
callerAge           0.222987
callid              0.222987
stim_duration       0.000000
stim_source         0.000000
stim_source_sex     0.000000
stim_type           0.000000
brain_region        0.000000
hemisphere          0.000000
vocid               0.000000
auditory_cortex     0.000000
dtype: float64

In [29]:
df.sample(5)

Unnamed: 0,subject,site,single_unit,sortType,stim_number,original_wavfile,callerAge,callid,stim_duration,stim_source,stim_source_sex,stim_type,brain_region,hemisphere,vocid,auditory_cortex
5372,BlaBro09xxF,Site1,Site1_L1500R1500_e4_s0_ss1,single,264,WhiLbl0010_MAS_Ag_2.wav,A,Ag,0.245433,unfamiliar,m,call,NCM,L,Ag,A2
62072,GreBlu9508M,Site4,Site4_L1500R1900_e8_s0_ss1,single,284,YelGre5275_MAF_DC_3-8-2.wav,A,DC,2.5,familiar,m,call,NCM,L,DC,A2
134566,YelBlu6903F,Site1,Site1_L500R400_e30_s0_ss1,single,43,BlaBla0506_MAF_DC_9-3-8.wav,A,DC,2.5,familiar,m,call,NCM,R,DC,A2
92371,WhiWhi4522M,Site1,Site1_L1100R1275_e29_s0_ss1,single,102,WhiWhi1415_FAF_Ne_8-3-10.wav,A,Ne,2.5,familiar,f,call,CML,R,Ne,A1
99582,WhiWhi4522M,Site2,Site2_L1500R1450_e32_s0_ss1,single,108,WhiWhi1415_FAF_Th_7-3-10.wav,A,Th,2.5,familiar,f,call,NCM,R,Th,A2


In [30]:
df.to_csv('aa-4.csv', index=False)