In [1]:
# cgn_determine_durations.ipynb
# H. Muller
# 2023-03-31

# Input: cgn components
# Output: compounds in cgn with information about duration

In [2]:
from os import environ

# Parse arguments
if environ.get('cgn'):
    cgnPath = environ.get('cgn')
else:
    cgnPath = '../DataProcessed/cgn_morph_analysis_comp-o.csv'

if environ.get('outfile'):
    outfile = environ.get('outfile')
else:
    outfile = '../DataProcessed/cgn_durations_comp-o.csv'

In [3]:
import pandas as pd

# Read files
cgn = pd.read_csv(cgnPath, sep='\t', index_col=0)
cgn.head()

Unnamed: 0,FileNameTierName,ID,WordOrtho,WordPhono,Phone,PhoneStart,PhoneEnd,variant,WordStart,WordEnd,...,leftOrtho,Interfix,rightOrtho,IfixPhono,leftPhono,rightPhono,wordPhonoCELEX,leftPhonoCELEX,IfixPhonoCELEX,rightPhonoCELEX
0,fn001092.awdN00551,182-apekool.,apekool,ap@kol,a,57.578,57.7,Netherlandic,57.578,58.176,...,aap,e,kool,@,ap,kol,ap@kol,ap,@,kol
1,fn001092.awdN00551,182-apekool.,apekool,ap@kol,p,57.7,57.771,Netherlandic,57.578,58.176,...,aap,e,kool,@,ap,kol,ap@kol,ap,@,kol
2,fn001092.awdN00551,182-apekool.,apekool,ap@kol,@,57.771,57.822,Netherlandic,57.578,58.176,...,aap,e,kool,@,ap,kol,ap@kol,ap,@,kol
3,fn001092.awdN00551,182-apekool.,apekool,ap@kol,k,57.822,57.953,Netherlandic,57.578,58.176,...,aap,e,kool,@,ap,kol,ap@kol,ap,@,kol
4,fn001092.awdN00551,182-apekool.,apekool,ap@kol,o,57.953,58.136,Netherlandic,57.578,58.176,...,aap,e,kool,@,ap,kol,ap@kol,ap,@,kol


# Clean dataframe

In [4]:
# replace na
cgn = cgn.fillna('')

In [5]:
# some words have no value in the phone column sometimes
cgn.loc[cgn.WordOrtho=='vruchtenbrandewijn',].head()

Unnamed: 0,FileNameTierName,ID,WordOrtho,WordPhono,Phone,PhoneStart,PhoneEnd,variant,WordStart,WordEnd,...,leftOrtho,Interfix,rightOrtho,IfixPhono,leftPhono,rightPhono,wordPhonoCELEX,leftPhonoCELEX,IfixPhonoCELEX,rightPhonoCELEX
3728,fn001262.awdN00624,861-vruchtenbrandewijn.,vruchtenbrandewijn,=frYxt@brAnd@wE+n,,360.753,360.886,Netherlandic,360.753,361.81,...,vrucht,en,brandewijn,@,=frYxt,brAnd@wE+n,vr}xt@brAnd@wKn,vr}xt,@,brAnd@wKn
3729,fn001262.awdN00624,861-vruchtenbrandewijn.,vruchtenbrandewijn,=frYxt@brAnd@wE+n,r,360.886,360.947,Netherlandic,360.753,361.81,...,vrucht,en,brandewijn,@,=frYxt,brAnd@wE+n,vr}xt@brAnd@wKn,vr}xt,@,brAnd@wKn
3730,fn001262.awdN00624,861-vruchtenbrandewijn.,vruchtenbrandewijn,=frYxt@brAnd@wE+n,Y,360.947,361.028,Netherlandic,360.753,361.81,...,vrucht,en,brandewijn,@,=frYxt,brAnd@wE+n,vr}xt@brAnd@wKn,vr}xt,@,brAnd@wKn
3731,fn001262.awdN00624,861-vruchtenbrandewijn.,vruchtenbrandewijn,=frYxt@brAnd@wE+n,x,361.028,361.099,Netherlandic,360.753,361.81,...,vrucht,en,brandewijn,@,=frYxt,brAnd@wE+n,vr}xt@brAnd@wKn,vr}xt,@,brAnd@wKn
3732,fn001262.awdN00624,861-vruchtenbrandewijn.,vruchtenbrandewijn,=frYxt@brAnd@wE+n,t,361.099,361.17,Netherlandic,360.753,361.81,...,vrucht,en,brandewijn,@,=frYxt,brAnd@wE+n,vr}xt@brAnd@wKn,vr}xt,@,brAnd@wKn


In [6]:
cgn.loc[cgn.Phone=='',]

Unnamed: 0,FileNameTierName,ID,WordOrtho,WordPhono,Phone,PhoneStart,PhoneEnd,variant,WordStart,WordEnd,...,leftOrtho,Interfix,rightOrtho,IfixPhono,leftPhono,rightPhono,wordPhonoCELEX,leftPhonoCELEX,IfixPhonoCELEX,rightPhonoCELEX
3728,fn001262.awdN00624,861-vruchtenbrandewijn.,vruchtenbrandewijn,=frYxt@brAnd@wE+n,,360.753,360.886,Netherlandic,360.753,361.81,...,vrucht,en,brandewijn,@,=frYxt,brAnd@wE+n,vr}xt@brAnd@wKn,vr}xt,@,brAnd@wKn
3749,fn001264.awdN00625,374-ogenblik,ogenblik,oG@blIk,,123.888,124.13,Netherlandic,123.383,124.13,...,oog,en,blik,@,oG,blIk,oG@blIk,oG,@,blIk
4142,fn001286.awdN00644,1208-gezondheidstoestand,gezondheidstoestand,x@zOnthE+tstustAnt,,421.7,421.871,Netherlandic,420.528,421.871,...,gezondheid,s,toestand,s,x@zOnthE+t,tustAnt,x@zOnthKtstustAnt,x@zOnthKt,s,tustAnt
7621,fn001475.awdN00780,213-heldendicht,heldendicht,hEld@dIxd,,84.858,84.919,Netherlandic,84.222,84.919,...,held,en,dicht,@,hEld,dIxd,hEld@dIxt,hEld,@,dIxt
8311,fn001508.awdN00796,40-tsarentijd,tsarentijd,tsar@tE+d,,13.182,13.335,Netherlandic,12.565,13.335,...,tsaar,en,tijd,@,tsar,tE+d,tsar@tKt,tsar,@,tKt
10155,fn001033.awdN00519,102-ontwikkelingsland,ontwikkelingsland,OntwIk@lINslAnd,,38.785,38.865,Netherlandic,37.887,38.865,...,ontwikkeling,s,land,s,OntwIk@lIN,lAnd,OntwIk@lINslAnt,OntwIk@lIN,s,lAnt
10998,fn001062.awdN00562,1891-plichtsverzuim,plichtsverzuim,plIxtsf@rzY+m,,623.399,623.47,Netherlandic,623.399,624.138,...,plicht,s,verzuim,s,plIxt,f@rzY+m,plIxtsf@rzLm,plIxt,s,f@rzLm


In [7]:
# some words have a '=' or '-' in their phonemic transcription, but not in the phone column
print(cgn.loc[cgn.WordPhono=='=f@rAnd@rINsprosEs', ['WordOrtho', 'WordPhono', 'leftPhono', 'rightPhono', 'Phone', 'WordStart', 
                                              'PhoneStart', 'PhoneEnd']].drop_duplicates().head())
print(cgn.loc[cgn.WordPhono=='G@rExtsx@bA+-w', ['WordOrtho', 'WordPhono', 'leftPhono', 'rightPhono', 'Phone', 'WordStart', 
                                              'PhoneStart', 'PhoneEnd']].drop_duplicates().head())

               WordOrtho           WordPhono    leftPhono rightPhono Phone  \
8762  veranderingsproces  =f@rAnd@rINsprosEs  =f@rAnd@rIN     prosEs     @   
8763  veranderingsproces  =f@rAnd@rINsprosEs  =f@rAnd@rIN     prosEs     r   
8764  veranderingsproces  =f@rAnd@rINsprosEs  =f@rAnd@rIN     prosEs     A   
8765  veranderingsproces  =f@rAnd@rINsprosEs  =f@rAnd@rIN     prosEs     n   
8766  veranderingsproces  =f@rAnd@rINsprosEs  =f@rAnd@rIN     prosEs     d   

      WordStart  PhoneStart  PhoneEnd  
8762    279.295     279.295   279.335  
8763    279.295     279.335   279.386  
8764    279.295     279.386   279.457  
8765    279.295     279.457   279.498  
8766    279.295     279.498   279.528  
            WordOrtho       WordPhono leftPhono rightPhono Phone  WordStart  \
11855  gerechtsgebouw  G@rExtsx@bA+-w    G@rExt    x@bA+-w     G      7.367   
11856  gerechtsgebouw  G@rExtsx@bA+-w    G@rExt    x@bA+-w     @      7.367   
11857  gerechtsgebouw  G@rExtsx@bA+-w    G@rExt    x@b

In [8]:
# some phones are transcribed with two symbols
print(cgn.loc[cgn.Phone.str.len()>1, 'Phone'].drop_duplicates())

8       A+
43      E+
88      Y+
1807    O:
Name: Phone, dtype: object


In [9]:
# replace '' symbol, 2-symbol transcriptions and '='
cgn[['WordPhono', 'Phone', 'leftPhono', 'IfixPhono', 'rightPhono']] = cgn[
    ['WordPhono', 'Phone', 'leftPhono', 'IfixPhono', 'rightPhono']].replace(
    {'A\+': '1', 'E\+': '2', 'Y\+': '3', 'O\:': '4', '':'5', '\=':'', '\-':''}, regex=True)
cgn.loc[cgn.WordOrtho=='vrouwenstem', ].head()

Unnamed: 0,FileNameTierName,ID,WordOrtho,WordPhono,Phone,PhoneStart,PhoneEnd,variant,WordStart,WordEnd,...,leftOrtho,Interfix,rightOrtho,IfixPhono,leftPhono,rightPhono,wordPhonoCELEX,leftPhonoCELEX,IfixPhonoCELEX,rightPhonoCELEX
6,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vr1w@stEm,v,79.356,79.448,Netherlandic,79.356,80.219,...,vrouw,en,stem,@,vr1w,stEm,vrMw@stEm,vrMw,@,stEm
7,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vr1w@stEm,r,79.448,79.51,Netherlandic,79.356,80.219,...,vrouw,en,stem,@,vr1w,stEm,vrMw@stEm,vrMw,@,stEm
8,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vr1w@stEm,1,79.51,79.664,Netherlandic,79.356,80.219,...,vrouw,en,stem,@,vr1w,stEm,vrMw@stEm,vrMw,@,stEm
9,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vr1w@stEm,w,79.664,79.726,Netherlandic,79.356,80.219,...,vrouw,en,stem,@,vr1w,stEm,vrMw@stEm,vrMw,@,stEm
10,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vr1w@stEm,@,79.726,79.798,Netherlandic,79.356,80.219,...,vrouw,en,stem,@,vr1w,stEm,vrMw@stEm,vrMw,@,stEm


In [10]:
# some words have multiple duplicated rows
duplis = cgn.duplicated(subset=['FileNameTierName', 'WordOrtho', 'WordPhono', 'Phone', 'PhoneStart', 'PhoneEnd'], keep=False)
cgn[duplis]

Unnamed: 0,FileNameTierName,ID,WordOrtho,WordPhono,Phone,PhoneStart,PhoneEnd,variant,WordStart,WordEnd,...,leftOrtho,Interfix,rightOrtho,IfixPhono,leftPhono,rightPhono,wordPhonoCELEX,leftPhonoCELEX,IfixPhonoCELEX,rightPhonoCELEX
3378,fn001234.awdN00607,711-pikkedonker.,pikkedonker,pIk@dONk@r,p,205.099,205.160,Netherlandic,205.099,205.739,...,pik,e,donker,@,pIk,dONk@r,pIk@dONk@r,pIk,@,dONk@r
3379,fn001234.awdN00607,711-pikkedonker.,pikkedonker,pIk@dONk@r,p,205.099,205.160,Netherlandic,205.099,205.739,...,pik,e,donker,@,pIk,dONk@r,pIk@dONk@r,pIk,@,dONk@r
3380,fn001234.awdN00607,711-pikkedonker.,pikkedonker,pIk@dONk@r,p,205.099,205.160,Netherlandic,205.099,205.739,...,pik,e,donker,@,pIk,dONk@r,pIk@dONk@r,pIk,@,dONk@r
3381,fn001234.awdN00607,711-pikkedonker.,pikkedonker,pIk@dONk@r,p,205.099,205.160,Netherlandic,205.099,205.739,...,pik,e,donker,@,pIk,dONk@r,pIk@dONk@r,pIk,@,dONk@r
3382,fn001234.awdN00607,711-pikkedonker.,pikkedonker,pIk@dONk@r,I,205.160,205.221,Netherlandic,205.099,205.739,...,pik,e,donker,@,pIk,dONk@r,pIk@dONk@r,pIk,@,dONk@r
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19376,fv800036.awdV80004,12-levenswijze.,levenswijze,lev@nsw2z@,z,4.570,4.690,Belgian,3.899,4.713,...,leven,s,wijze,s,lev@n,w2z@,lev@swKz@,lev@,s,wKz@
19377,fv800036.awdV80004,12-levenswijze.,levenswijze,lev@nsw2z@,@,4.690,4.713,Belgian,3.899,4.713,...,leven,s,wijs,s,lev@n,w2z@,lev@swKz@,lev@,s,wKz@
19378,fv800036.awdV80004,12-levenswijze.,levenswijze,lev@nsw2z@,@,4.690,4.713,Belgian,3.899,4.713,...,leven,s,wijs,s,lev@n,w2z@,lev@swKz@,lev@,s,wKz@
19379,fv800036.awdV80004,12-levenswijze.,levenswijze,lev@nsw2z@,@,4.690,4.713,Belgian,3.899,4.713,...,leven,s,wijze,s,lev@n,w2z@,lev@swKz@,lev@,s,wKz@


In [11]:
# fix those tokens
prior=len(cgn)
cgn = cgn.drop_duplicates(subset=['FileNameTierName', 'WordOrtho', 'WordPhono', 'Phone', 'PhoneStart', 'PhoneEnd'])
post=len(cgn)
print(f'{prior-post} rows removed. New number of rows: {post}')

493 rows removed. New number of rows: 19204


In [12]:
# correct not matching boundaries
WordStart = (cgn.groupby(['FileNameTierName', 'ID'])['PhoneStart'].agg(lambda x: min(x)).reset_index(name='WordStart'))
WordEnd = (cgn.groupby(['FileNameTierName', 'ID'])['PhoneEnd'].agg(lambda x: max(x)).reset_index(name='WordEnd'))

# delete old columns
del cgn['WordStart']
del cgn['WordEnd']

# merge with new columns
cgn = cgn.merge(WordStart[['FileNameTierName', 'ID', 'WordStart']], on=['FileNameTierName', 'ID'], how='left')
cgn = cgn.merge(WordEnd[['FileNameTierName', 'ID', 'WordEnd']], on=['FileNameTierName', 'ID'], how='left')

In [13]:
# sometimes the word's first or last phone were not entered into the phone tier and thus rows with the respective phones are missing
# i.e., the first phone of trappenhuis is r and not t
duplis = cgn.duplicated(subset=['FileNameTierName', 'ID', 'leftPhono'], keep='first')
firstRowsLeft = cgn[~duplis].copy()
missingPhonesLeft = firstRowsLeft.apply(lambda x: x['leftPhono'][0]!=x['Phone'], axis=1)
firstMissing = firstRowsLeft[missingPhonesLeft].copy()
firstMissing[['FileNameTierName', 'ID', 'rightPhono', 'Phone', 'PhoneStart', 'PhoneEnd']].head()

Unnamed: 0,FileNameTierName,ID,rightPhono,Phone,PhoneStart,PhoneEnd
182,fn001104.awdN00559,863-trappenhuis.,h3s,r,318.749,318.81
208,fn001108.awdN00564,487-toetsenbord.,bOrt,u,186.952,187.012
296,fn001115.awdN00615,1419-trappenhuis,h3s,r,543.216,543.267
514,fn001134.awdN00634,100-warenhuis,h3s,a,35.548,35.679
520,fn001134.awdN00634,216-sigarenwinkel,wINk@l,i,79.277,79.337


In [14]:
# sometimes the word's first or last phone were not entered into the phone tier and thus rows with the respective phones are missing
# i.e., the last phone of levenslust is s and not d
duplis = cgn.duplicated(subset=['FileNameTierName', 'ID', 'rightPhono'], keep='last')
firstRowsRight = cgn[~duplis].copy()
missingPhonesRight = firstRowsRight.apply(lambda x: x['rightPhono'][-1]!=x['Phone'], axis=1)
lastMissing = firstRowsRight[missingPhonesRight].copy()
lastMissing[['FileNameTierName', 'ID', 'rightPhono', 'Phone', 'PhoneStart', 'PhoneEnd']].head()

Unnamed: 0,FileNameTierName,ID,rightPhono,Phone,PhoneStart,PhoneEnd
2637,fn001202.awdN00702,678-levenslust,lYsd,s,246.984,247.025
3321,fn001225.awdN00572,1378-handenarbeid,Arb2t,2,459.382,459.524
3719,fn001264.awdN00625,374-ogenblik,blIk,5,123.888,124.13
3752,fn001264.awdN00625,1367-ogenblik,blIk,I,471.546,471.607
4112,fn001286.awdN00644,1208-gezondheidstoestand,tustAnt,5,421.7,421.871


In [15]:
# find missing phones
firstMissing['Phone'] = firstMissing['leftPhono'].apply(lambda x: x[0])
firstMissing['PhoneStart'] = firstMissing['PhoneEnd']

lastMissing['Phone'] = lastMissing['rightPhono'].apply(lambda x: x[-1:])
lastMissing['PhoneStart'] = lastMissing['PhoneEnd']

lastMissing[['FileNameTierName', 'ID', 'leftPhono', 'rightPhono', 'Phone', 'PhoneStart', 'PhoneEnd']].head()

Unnamed: 0,FileNameTierName,ID,leftPhono,rightPhono,Phone,PhoneStart,PhoneEnd
2637,fn001202.awdN00702,678-levenslust,lev@,lYsd,d,247.025,247.025
3321,fn001225.awdN00572,1378-handenarbeid,hAnd,Arb2t,t,459.524,459.524
3719,fn001264.awdN00625,374-ogenblik,oG,blIk,k,124.13,124.13
3752,fn001264.awdN00625,1367-ogenblik,oG,blIk,k,471.607,471.607
4112,fn001286.awdN00644,1208-gezondheidstoestand,x@zOnth2t,tustAnt,t,421.871,421.871


In [16]:
#add columns to CGN
cgn = pd.concat([cgn, firstMissing, lastMissing], ignore_index=True)
cgn.loc[cgn.WordOrtho=='gevarenzone', ['WordPhono', 'leftPhono', 'IfixPhono', 'rightPhono']]

Unnamed: 0,WordPhono,leftPhono,IfixPhono,rightPhono
1800,x@var@z4n@,x@var,@,z4n@
1801,x@var@z4n@,x@var,@,z4n@
1802,x@var@z4n@,x@var,@,z4n@
1803,x@var@z4n@,x@var,@,z4n@
1804,x@var@z4n@,x@var,@,z4n@
1805,x@var@z4n@,x@var,@,z4n@
1806,x@var@z4n@,x@var,@,z4n@
1807,x@var@z4n@,x@var,@,z4n@
1808,x@var@z4n@,x@var,@,z4n@
1809,x@var@z4n@,x@var,@,z4n@


# Compute durations

In [17]:
# determine constituents' length in number of characters
leftPhonoLen = (cgn.groupby(['FileNameTierName', 'ID'])['leftPhono'].agg(lambda x: x.str.len().drop_duplicates()).reset_index(name='leftPhonoLen'))
rightPhonoLen = (cgn.groupby(['FileNameTierName', 'ID'])['rightPhono'].agg(lambda x: x.str.len().drop_duplicates()).reset_index(name='rightPhonoLen'))

In [18]:
# determine length of interfixes and constituents in number of characters
cgn = cgn.merge(leftPhonoLen[['FileNameTierName', 'ID', 'leftPhonoLen']], on=['FileNameTierName', 'ID'], how='left')
cgn = cgn.merge(rightPhonoLen[['FileNameTierName', 'ID', 'rightPhonoLen']], on=['FileNameTierName', 'ID'], how='left')
cgn['IfixLen'] = cgn.IfixPhono.str.len()
cgn['PhoneDuration'] = cgn['PhoneEnd'] - cgn['PhoneStart']
cgn['WordDuration'] = cgn['WordEnd'] - cgn['WordStart']

In [19]:
# write functions to determine left and right constituents' durations
def sum_left(df):
    x = df.iloc[0]['leftPhonoLen']
    return(df.iloc[:x]['PhoneDuration'].sum())

def sum_right(df):
    x = df.iloc[0]['rightPhonoLen']
    return(df.iloc[-x:]['PhoneDuration'].sum())

# add durations to dataframe
LeftDuration = cgn.groupby(['FileNameTierName', 'ID']).apply(sum_left).reset_index(name='LeftDuration')
RightDuration = cgn.groupby(['FileNameTierName', 'ID']).apply(sum_right).reset_index(name='RightDuration')
cgn = cgn.merge(LeftDuration[['FileNameTierName', 'ID', 'LeftDuration']], on=['FileNameTierName', 'ID'], how='left')
cgn = cgn.merge(RightDuration[['FileNameTierName', 'ID', 'RightDuration']], on=['FileNameTierName', 'ID'], how='left')

# compute interfix duration
cgn['IfixDuration'] = cgn['WordDuration'] - cgn['LeftDuration'] - cgn['RightDuration']

In [20]:
import numpy as np

# change duration if no interfix
cgn['IfixDuration'] = np.where(cgn['IfixPhono']=='', 0, cgn['IfixDuration'])

In [21]:
# check if any items have zero duration
cgn.loc[cgn.IfixDuration<=0, ['WordOrtho', 'WordPhono', 'leftPhono', 'IfixPhono', 'rightPhono', 'Phone', 'PhoneStart', 'PhoneEnd', 
                              'PhoneDuration', 'WordDuration', 'LeftDuration', 'RightDuration']].drop_duplicates()

Unnamed: 0,WordOrtho,WordPhono,leftPhono,IfixPhono,rightPhono,Phone,PhoneStart,PhoneEnd,PhoneDuration,WordDuration,LeftDuration,RightDuration


In [22]:
# undo replacement of transcriptions
cgn[['WordPhono', 'Phone', 'leftPhono', 'IfixPhono', 'rightPhono']] = cgn[
    ['WordPhono', 'Phone', 'leftPhono', 'IfixPhono', 'rightPhono']].replace(
    {'1':'A+', '2':'E+', '3':'Y+', '4':'O:', '':'5'}, regex=True)
cgn.loc[cgn.WordOrtho=='vrouwenstem', ].head()

Unnamed: 0,FileNameTierName,ID,WordOrtho,WordPhono,Phone,PhoneStart,PhoneEnd,variant,UtteranceBorder,UtteranceStart,...,WordStart,WordEnd,leftPhonoLen,rightPhonoLen,IfixLen,PhoneDuration,WordDuration,LeftDuration,RightDuration,IfixDuration
6,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vrA+w@stEm,v,79.356,79.448,Netherlandic,False,79.233,...,79.356,80.219,4,4,1,0.092,0.863,0.37,0.421,0.072
7,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vrA+w@stEm,r,79.448,79.51,Netherlandic,False,79.233,...,79.356,80.219,4,4,1,0.062,0.863,0.37,0.421,0.072
8,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vrA+w@stEm,A+,79.51,79.664,Netherlandic,False,79.233,...,79.356,80.219,4,4,1,0.154,0.863,0.37,0.421,0.072
9,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vrA+w@stEm,w,79.664,79.726,Netherlandic,False,79.233,...,79.356,80.219,4,4,1,0.062,0.863,0.37,0.421,0.072
10,fn001092.awdN00551,251-vrouwenstem,vrouwenstem,vrA+w@stEm,@,79.726,79.798,Netherlandic,False,79.233,...,79.356,80.219,4,4,1,0.072,0.863,0.37,0.421,0.072


# Write dataframe to file

In [23]:
cgn.to_csv(outfile, sep='\t')