In [2]:
import json
import regex as re
from unidecode import unidecode
import yaml
import logging
import numpy as np 
import pandas as pd
from IPython.display import Markdown
from datetime import datetime as dt
from time import sleep
from importlib import reload
from ast import literal_eval

import spacy
from spacy import displacy
spacy.require_gpu()

import files, configs, logs
import twitter_data as td
import tweets as t

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
logs.setup_logger(
    file_name='adding-verb-spacing', 
    desc='After meeting with Dr Jacqueline Serigos. Decided to instead add column indicating V2 and the distance from CCOMP VOI'
)

In [4]:
verbs = set(files.get_verb_conjugations()['verb'].to_numpy())
verbs

{'acordar',
 'adivinar',
 'admitir',
 'afirmar',
 'apostar',
 'asegurar',
 'comprobar',
 'confesar',
 'confirmar',
 'conseguir',
 'considerar',
 'contar',
 'creer',
 'decir',
 'demostrar',
 'desear',
 'dudar',
 'entender',
 'esperar',
 'gritar',
 'imaginar',
 'jurar',
 'lamentar',
 'lograr',
 'mandar',
 'mencionar',
 'mostrar',
 'negar',
 'ojala',
 'ordenar',
 'parecer',
 'pedir',
 'pensar',
 'predecir',
 'prever',
 'prometer',
 'querer',
 'reclamar',
 'recomendar',
 'recordar',
 'responder',
 'rogar',
 'saber',
 'sentir',
 'solicitar',
 'suplicar',
 'suponer',
 'suspirar',
 'temer',
 'ver'}

In [5]:
gen_conf = configs.read_conf()
conf = configs.read_conf('p')

In [6]:
nlp = spacy.load(conf['spacy']['es'], disable=conf['spacy']['pipeline']['disable'])

In [7]:
data_dir = files.choose_save_path('p')

Choose from the available folders, comma-separated (or "a" for all):
0. twitter-es-esperar-43769-postagged-20-7-2021
1. 2022-04-19 15:41:08
2. backup
3. 2022-07-07 15:16:47
4. 2022-03-08 15:15:06
5. 2022-04-22 21:18:32
6. 2022-06-30 15:48:37
7. 20210726
8. 2022-02-08 16:34:36
9. samples
10. 07112021-at-2210
Return folder(s): 9


In [12]:
data_path = data_dir[0]/'FOR ADRIAN twitter-es-creer-5-912 (1)CLEAR ANNOTATION.xlsx'
data_path.is_file()

True

In [98]:
data = pd.read_excel(data_path)
data.head(3)

Unnamed: 0,verbs,tweet_id,ccomp_head,text_orig,in comp,normalized,dependencies,pos,details
0,"['entender', 'creer']",1453514708506525952,"['entender', 'creer', 'hacer']",@nabbp08 @IvanCepedaCast No entiendo porqué al...,que,@nabbp08 @ivancepedacast no ENTIENDO porqué a...,@nabbp08[ROOT] @ivancepedacast[flat] no[advmo...,@nabbp08(PROPN) @IvanCepedaCast(PROPN) No(ADV)...,"<@nabbp08>(@nabbp08,False) <@IvanCepedaCast>(@..."
1,"['mostrar', 'creer']",1455655561773690880,['creer'],@alferdez habla de país y su deuda. Muestra in...,que,@alferdez habla de país y su deuda. MUESTRA i...,@alferdez[nsubj] habla[ROOT] de[case] país[ob...,@alferdez(PROPN) habla(VERB) de(ADP) país(NOUN...,"<@alferdez>(@alferdez,False) <habla>(hablar,Tr..."
2,"['pedir', 'entender', 'creer']",1455544578203913984,"['creer', 'entender', 'refirio', 'preferir', '...",@diego_espacio @E_FLEISCHMAN Creo q ni tú has ...,que,@diego_espacio @e_fleischman CREO Q ni tú has...,@diego_espacio[ROOT] @e_fleischman[flat] CREO...,@diego_espacio(PROPN) @E_FLEISCHMAN(PROPN) Cre...,"<@diego_espacio>(@diego_espacio,False) <@E_FLE..."


In [164]:
procd = data['text_orig'].apply(nlp)

In [83]:
VERB_POS = {'VB','VBD','VBG','VBN','VBP','VBZ','VERB'}

In [76]:
def get_data(token):
    info = dict()
    
    try:
        for t in token:
            info[t.i] = {
                'text': t.text, 
                'lemma': t.lemma_, 
                'dep': t.dep_,
                'pos': t.tag_, 
                'morph': t.morph
            }
    except AttributeError as e:
        print('Rerun the spacy processing; something went wrong and some elements '\
              'converted to dict instead of Token')
        
    return info

In [165]:
def ccomp_head_detail(token):
    """Get dict of tuples <ccomp head lemma>: ((<ccomp head text>, index), (<ccomp>, index))"""
    ccomp_head = dict()
    
    for t in token:
        if t.dep_ == 'ccomp':
            head = t.head
            if (head.pos_ in VERB_POS) and (head.lemma_ in verbs):
                ccomp_head[head.lemma_] = (((head.text, head.i), (t.text, t.i)))
    
    return ccomp_head

In [166]:
details = procd.apply(get_data).rename('details')
print(details.head(3))
head = procd.apply(ccomp_head_detail).rename('head')
print(head.head(3))
ccomp_head_detail = procd.apply(ccomp_head_detail).rename('ccomp_head_detail')
print(ccomp_head.head(3))

0    {0: {'text': '@nabbp08', 'lemma': '@nabbp08', ...
1    {0: {'text': '@alferdez', 'lemma': '@alferdez'...
2    {0: {'text': '@diego_espacio', 'lemma': '@dieg...
Name: details, dtype: object
0    {'entender': (('entiendo', 3), ('creen', 6)), ...
1         {'creer': (('Creerá', 32), ('idiotas', 38))}
2    {'creer': (('Creo', 2), ('entendido', 7)), 'en...
Name: head, dtype: object
0    {'entender': (('entiendo', 3), ('creen', 6)), ...
1         {'creer': (('Creerá', 32), ('idiotas', 38))}
2    {'creer': (('Creo', 2), ('entendido', 7)), 'en...
Name: ccomp_head, dtype: object


In [78]:
print(details[0])

{0: {'text': '@nabbp08', 'lemma': '@nabbp08', 'dep': 'ROOT', 'pos': 'PROPN', 'morph': }, 1: {'text': '@IvanCepedaCast', 'lemma': '@IvanCepedaCast', 'dep': 'flat', 'pos': 'PROPN', 'morph': }, 2: {'text': 'No', 'lemma': 'no', 'dep': 'advmod', 'pos': 'ADV', 'morph': Polarity=Neg}, 3: {'text': 'entiendo', 'lemma': 'entender', 'dep': 'ROOT', 'pos': 'VERB', 'morph': Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin}, 4: {'text': 'porqué', 'lemma': 'porqué', 'dep': 'obl', 'pos': 'PRON', 'morph': PronType=Ind}, 5: {'text': 'algunos', 'lemma': 'alguno', 'dep': 'nsubj', 'pos': 'PRON', 'morph': Gender=Masc|Number=Plur|PronType=Ind}, 6: {'text': 'creen', 'lemma': 'creer', 'dep': 'ccomp', 'pos': 'VERB', 'morph': Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin}, 7: {'text': 'que', 'lemma': 'que', 'dep': 'mark', 'pos': 'SCONJ', 'morph': }, 8: {'text': 'cualquier', 'lemma': 'cualquiera', 'dep': 'det', 'pos': 'DET', 'morph': Number=Sing|PronType=Ind}, 9: {'text': 'personaje', 'lemma': 'person

In [99]:
data['ccomp_head'] = data['ccomp_head'].apply(literal_eval)
data = data.rename(columns={'in comp ': 'in_comp'})
data = data.drop(columns=['ccomp_head'])

In [215]:
ndata = pd.concat([data.loc[:, ['tweet_id','in_comp','verbs']], head, ccomp_head_detail, data.loc[:, ['text_orig','dependencies']], details], axis=1)
ndata.head(3)

Unnamed: 0,tweet_id,in_comp,verbs,head,ccomp_head_detail,text_orig,dependencies,details
0,1453514708506525952,que,"['entender', 'creer']","{'entender': (('entiendo', 3), ('creen', 6)), ...","{'entender': (('entiendo', 3), ('creen', 6)), ...",@nabbp08 @IvanCepedaCast No entiendo porqué al...,@nabbp08[ROOT] @ivancepedacast[flat] no[advmo...,"{0: {'text': '@nabbp08', 'lemma': '@nabbp08', ..."
1,1455655561773690880,que,"['mostrar', 'creer']","{'creer': (('Creerá', 32), ('idiotas', 38))}","{'creer': (('Creerá', 32), ('idiotas', 38))}",@alferdez habla de país y su deuda. Muestra in...,@alferdez[nsubj] habla[ROOT] de[case] país[ob...,"{0: {'text': '@alferdez', 'lemma': '@alferdez'..."
2,1455544578203913984,que,"['pedir', 'entender', 'creer']","{'creer': (('Creo', 2), ('entendido', 7)), 'en...","{'creer': (('Creo', 2), ('entendido', 7)), 'en...",@diego_espacio @E_FLEISCHMAN Creo q ni tú has ...,@diego_espacio[ROOT] @e_fleischman[flat] CREO...,"{0: {'text': '@diego_espacio', 'lemma': '@dieg..."


In [216]:
ndata = ndata.explode('head', ignore_index=True)
ndata.head(3)

Unnamed: 0,tweet_id,in_comp,verbs,head,ccomp_head_detail,text_orig,dependencies,details
0,1453514708506525952,que,"['entender', 'creer']",entender,"{'entender': (('entiendo', 3), ('creen', 6)), ...",@nabbp08 @IvanCepedaCast No entiendo porqué al...,@nabbp08[ROOT] @ivancepedacast[flat] no[advmo...,"{0: {'text': '@nabbp08', 'lemma': '@nabbp08', ..."
1,1453514708506525952,que,"['entender', 'creer']",creer,"{'entender': (('entiendo', 3), ('creen', 6)), ...",@nabbp08 @IvanCepedaCast No entiendo porqué al...,@nabbp08[ROOT] @ivancepedacast[flat] no[advmo...,"{0: {'text': '@nabbp08', 'lemma': '@nabbp08', ..."
2,1455655561773690880,que,"['mostrar', 'creer']",creer,"{'creer': (('Creerá', 32), ('idiotas', 38))}",@alferdez habla de país y su deuda. Muestra in...,@alferdez[nsubj] habla[ROOT] de[case] país[ob...,"{0: {'text': '@alferdez', 'lemma': '@alferdez'..."


In [217]:
# drop elements with ccomp head not containing VOI
not_in = ndata.loc[~ndata['head'].isin(verbs), :]
print(not_in.shape[0])
not_in

32


Unnamed: 0,tweet_id,in_comp,verbs,head,ccomp_head_detail,text_orig,dependencies,details
73,1455486694006960128,,['creer'],,{},Yo creo q d la brisa del mar xd,yo[nsubj] CREO[ROOT] q[mark] D[CCOMP] la[det]...,"{0: {'text': 'Yo', 'lemma': 'yo', 'dep': 'nsub..."
88,1456958790293614080,,['creer'],,{},@keito_sanchez Jaja no creo en el fuego se coc...,@keito_sanchez[ROOT] jaja[flat] no[advmod] CR...,"{0: {'text': '@keito_sanchez', 'lemma': '@keit..."
107,1454156646947692032,,"['ver', 'creer']",,{},Yo creo que eso no va pasar es solo un deseo; ...,yo[nsubj] CREO[ROOT] que[mark] eso[nsubj] no[...,"{0: {'text': 'Yo', 'lemma': 'yo', 'dep': 'nsub..."
145,1449192019574672896,,"['parecer', 'creer']",,{},@ArturoAroni Me parece que esa tapa es trucha....,@arturoaroni[ROOT] me[iobj] PARECE[ROOT] que[...,"{0: {'text': '@ArturoAroni', 'lemma': '@Arturo..."
150,1454928340004090112,,"['parecer', 'creer']",,{},"@RoyalCaribbean Mi primer crucero, viajé el #...",@royalcaribbean[obj] mi[det] primer[amod] cru...,"{0: {'text': '@RoyalCaribbean', 'lemma': '@Roy..."
154,1454900755404623872,,"['parecer', 'creer']",,{},@ActualidadRT Como se parecen a Jesús ! empeza...,@actualidadrt[ROOT] como[mark] se[expl:pv] PA...,"{0: {'text': '@ActualidadRT', 'lemma': '@Actua..."
177,1451926101287129088,,['creer'],,{},"@SinEmbargoMX Estos poliquiteros ""sabelotodo""\...",@sinembargomx[ROOT] estos[det] poliquiteros[n...,"{0: {'text': '@SinEmbargoMX', 'lemma': '@SinEm..."
301,1456835186818260992,,['creer'],,{},@oscar_canton 😂😂😂 hora los diputados mamarrac...,@oscar_canton[obl] [dep] 😂[nsubj] 😂[fixed] 😂...,"{0: {'text': '@oscar_canton', 'lemma': '@oscar..."
315,1450445453741498112,,"['mandar', 'creer']",,{},@KKazzari @LulaBarba @beltrandelrio @AndyFayri...,@kkazzari[ROOT] @lulabarba[flat] @beltrandelr...,"{0: {'text': '@KKazzari', 'lemma': '@KKazzari'..."
316,1448340889651322880,,"['pedir', 'recordar', 'creer']",,{},@virgi_m13 Yo creo recordar de ir a un miting ...,@virgi_m13[ROOT] yo[nsubj] CREO[ROOT] RECORDA...,"{0: {'text': '@virgi_m13', 'lemma': '@virgi_m1..."


In [218]:
ndata = ndata.drop(index=not_in.index).reset_index()
ndata.shape[0]

1125

In [184]:
def get_range_info(tweet):
    # TODO 3/11: update docs
    """Get the next verb after ccomp head and count the amount of words between them"""
    head = tweet['head']
    
    v1, v1_idx = tweet['ccomp_head_detail'][head][0]
    ccomp, ccomp_idx = tweet['ccomp_head_detail'][head][1]
    v2 = None
    
#     print(f'Head: {head}, v1 index: {v1_idx}, ccomp: {ccomp}, ccomp index: {ccomp_idx}\n')
    
    ccomp_dist = 0
    v2_dist = 0
    
    for i,w in tweet['details'].items():
#         print(i, w)
        
        # start after v1/ccomp_head
        if i <= v1_idx:
            continue
        
        if (v2 is None) and (w['pos'] in VERB_POS):
#             print(f'*** V2 found: {w["text"], i} ***')
            v2 = (w['text'], i)
            
        # only count words
        if w['pos'] != 'PUNCT':
            if i < ccomp_idx:
                ccomp_dist += 1
            if v2 is None:
                v2_dist += 1
    
    info = {
        'ccomp': (ccomp, ccomp_idx),
        'ccomp_dist': ccomp_dist,
        'v2': v2, 
        'v2_dist': v2_dist if v2 is not None else None
    }
    
    return info

In [152]:
s = ndata.iloc[12]
display(s)

tweet_id                                           1454942423797305088
in_comp                                                            NaN
verbs                                    ['decir', 'mostrar', 'creer']
ccomp_head                                                       decir
ccomp_head_detail          {'decir': (('dicen', 4), ('seguiría', 27))}
text_orig            @rodrigolussich y @DaniAmbrosino ustedes dicen...
details              {0: {'text': '@rodrigolussich', 'lemma': '@rod...
Name: 12, dtype: object

In [153]:
print(s['text_orig'])

@rodrigolussich y @DaniAmbrosino ustedes dicen q la @chinasuarez sería aún más T R O L A de lo q ya le mostró al 🌍🌎y le seguiría escribiendo a Icardi (a pesar de q creo ya se enteró de q el sigue casado)? Yo No creo...tiene hij@s!
#ElShowDeLosEscandalones


In [154]:
get_range_info(s)

Head: decir, v1 index: 4, ccomp: seguiría, ccomp index: 27

0 {'text': '@rodrigolussich', 'lemma': '@rodrigolussich', 'dep': 'ROOT', 'pos': 'PROPN', 'morph': }
1 {'text': 'y', 'lemma': 'y', 'dep': 'cc', 'pos': 'CCONJ', 'morph': }
2 {'text': '@DaniAmbrosino', 'lemma': '@DaniAmbrosino', 'dep': 'conj', 'pos': 'PROPN', 'morph': }
3 {'text': 'ustedes', 'lemma': 'tú', 'dep': 'nsubj', 'pos': 'PRON', 'morph': Case=Acc,Nom|Number=Plur|Person=2|Polite=Form|PronType=Prs}
4 {'text': 'dicen', 'lemma': 'decir', 'dep': 'ROOT', 'pos': 'VERB', 'morph': Mood=Ind|Number=Plur|Person=3|Tense=Pres|VerbForm=Fin}
5 {'text': 'q', 'lemma': 'q', 'dep': 'mark', 'pos': 'SCONJ', 'morph': }
6 {'text': 'la', 'lemma': 'el', 'dep': 'det', 'pos': 'DET', 'morph': Definite=Def|Gender=Fem|Number=Sing|PronType=Art}
7 {'text': '@chinasuarez', 'lemma': '@chinasuarez', 'dep': 'nsubj', 'pos': 'PROPN', 'morph': }
8 {'text': 'sería', 'lemma': 'ser', 'dep': 'cop', 'pos': 'AUX', 'morph': Mood=Cnd|Number=Sing|Person=3|VerbForm=Fin}


{'v2': ('mostró', 21), 'v2_dist': 16, 'ccomp_dist': 22}

In [219]:
range_info = ndata.apply(get_range_info, axis=1)

In [220]:
ccomp = range_info.apply(lambda x: x['ccomp']).rename('ccomp')
ccomp_dist = range_info.apply(lambda x: x['ccomp_dist']).rename('ccomp_dist')
v2 = range_info.apply(lambda x: x['v2']).rename('v2')
v2_dist = range_info.apply(lambda x: x['v2_dist']).rename('v2_dist')

In [221]:
fdata = pd.concat(
    [ndata.loc[:, ['tweet_id','in_comp','verbs','head']],
     ccomp,
     ccomp_dist, 
     v2, 
     v2_dist, 
     ndata.loc[:, ['text_orig','dependencies','ccomp_head_detail','details']]
    ], axis=1).reset_index(drop=True)

In [222]:
fdata.head()

Unnamed: 0,tweet_id,in_comp,verbs,head,ccomp,ccomp_dist,v2,v2_dist,text_orig,dependencies,ccomp_head_detail,details
0,1453514708506525952,que,"['entender', 'creer']",entender,"(creen, 6)",2,"(creen, 6)",2.0,@nabbp08 @IvanCepedaCast No entiendo porqué al...,@nabbp08[ROOT] @ivancepedacast[flat] no[advmo...,"{'entender': (('entiendo', 3), ('creen', 6)), ...","{0: {'text': '@nabbp08', 'lemma': '@nabbp08', ..."
1,1453514708506525952,que,"['entender', 'creer']",creer,"(tiene, 21)",10,"(tiene, 21)",10.0,@nabbp08 @IvanCepedaCast No entiendo porqué al...,@nabbp08[ROOT] @ivancepedacast[flat] no[advmo...,"{'entender': (('entiendo', 3), ('creen', 6)), ...","{0: {'text': '@nabbp08', 'lemma': '@nabbp08', ..."
2,1455655561773690880,que,"['mostrar', 'creer']",creer,"(idiotas, 38)",5,"(hacía, 43)",10.0,@alferdez habla de país y su deuda. Muestra in...,@alferdez[nsubj] habla[ROOT] de[case] país[ob...,"{'creer': (('Creerá', 32), ('idiotas', 38))}","{0: {'text': '@alferdez', 'lemma': '@alferdez'..."
3,1455544578203913984,que,"['pedir', 'entender', 'creer']",creer,"(entendido, 7)",4,"(entendido, 7)",4.0,@diego_espacio @E_FLEISCHMAN Creo q ni tú has ...,@diego_espacio[ROOT] @e_fleischman[flat] CREO...,"{'creer': (('Creo', 2), ('entendido', 7)), 'en...","{0: {'text': '@diego_espacio', 'lemma': '@dieg..."
4,1455544578203913984,que,"['pedir', 'entender', 'creer']",entender,"(preguntado, 11)",3,"(preguntado, 11)",3.0,@diego_espacio @E_FLEISCHMAN Creo q ni tú has ...,@diego_espacio[ROOT] @e_fleischman[flat] CREO...,"{'creer': (('Creo', 2), ('entendido', 7)), 'en...","{0: {'text': '@diego_espacio', 'lemma': '@dieg..."


In [183]:
def get_norm(tweet):
    # TODO 3/11: update docs
    """Normalize original text to be easier for qualitative analysis; relies on other analysis methods"""
    norm = ''
    que_variations = {'que','q','k','ke'}
    
    for t in tweet:
        tuni = unidecode(t.text).lower() # unidecoded to check for 'que' variations using only English characters
        text = ''
        
        if t.is_punct:
            text = t.text
            
        elif tuni in que_variations:
            text = t.text.upper()
            
#         elif t.dep_ == 'ccomp':
#             head = f'{t.head} -> {t.text}'
#             ccomp_head.append(t.head)
        
        elif (t.pos_=='VERB') and (t.lemma_ in verbs):
            text = t.text.upper()
            if t.dep_=='ccomp':
                text = f'<<{text}>>'
            
        else:
            text = t.text.lower()
            
        norm += ' ' + text
    
    return norm

In [197]:
def normalize(tweet):
    # TODO 3/11: update docs
    """Normalize original text to be easier for qualitative analysis; relies on other analysis methods"""
    QUE_VARS = {'que','q','k','ke'}
    
    norm = ''
    head = tweet['head']
    head_idx = tweet['ccomp_head_detail'][head][0][1]
    v2, v2_idx = tweet['v2'] if tweet['v2'] is not None else (None, None)
    ccomp, ccomp_idx = tweet['ccomp']
    
    for i, t in tweet['details'].items():
        tuni = unidecode(t['text']).lower() # unidecoded to check for 'que' variations using only English characters
        text = ''
        
        if t['pos'] == 'PUNCT':
            text = t['text']
            
        elif i == head_idx:
            text = '>>' + t['text'].upper()
        
        # only mark V2 if V2 != ccomp
        elif (i == v2_idx) and (v2_idx != ccomp_idx):
            text = t['text'].upper() + '[V2]'
        
        elif i == ccomp_idx:
            text = t['text'].upper() + '<<'
            
        elif tuni in QUE_VARS:
            text = t['text'].upper()
            
        else:
            text = t['text'].lower()
            
        norm += ' ' + text
    
    return norm

In [210]:
norm_new = fdata.apply(normalize, axis=1).rename('text_norm')
norm_new.head()

0     @nabbp08 @ivancepedacast no >>ENTIENDO porqué...
1     @nabbp08 @ivancepedacast no entiendo porqué a...
2     @alferdez habla de país y su deuda . muestra ...
3     @diego_espacio @e_fleischman >>CREO Q ni tú h...
4     @diego_espacio @e_fleischman creo Q ni tú has...
Name: text_norm, dtype: object

In [223]:
fdata = pd.concat([
    fdata.iloc[:, :9],
    norm_new,
    fdata.iloc[:, 9:]
], axis=1)

In [224]:
fdata.head()

Unnamed: 0,tweet_id,in_comp,verbs,head,ccomp,ccomp_dist,v2,v2_dist,text_orig,text_norm,dependencies,ccomp_head_detail,details
0,1453514708506525952,que,"['entender', 'creer']",entender,"(creen, 6)",2,"(creen, 6)",2.0,@nabbp08 @IvanCepedaCast No entiendo porqué al...,@nabbp08 @ivancepedacast no >>ENTIENDO porqué...,@nabbp08[ROOT] @ivancepedacast[flat] no[advmo...,"{'entender': (('entiendo', 3), ('creen', 6)), ...","{0: {'text': '@nabbp08', 'lemma': '@nabbp08', ..."
1,1453514708506525952,que,"['entender', 'creer']",creer,"(tiene, 21)",10,"(tiene, 21)",10.0,@nabbp08 @IvanCepedaCast No entiendo porqué al...,@nabbp08 @ivancepedacast no entiendo porqué a...,@nabbp08[ROOT] @ivancepedacast[flat] no[advmo...,"{'entender': (('entiendo', 3), ('creen', 6)), ...","{0: {'text': '@nabbp08', 'lemma': '@nabbp08', ..."
2,1455655561773690880,que,"['mostrar', 'creer']",creer,"(idiotas, 38)",5,"(hacía, 43)",10.0,@alferdez habla de país y su deuda. Muestra in...,@alferdez habla de país y su deuda . muestra ...,@alferdez[nsubj] habla[ROOT] de[case] país[ob...,"{'creer': (('Creerá', 32), ('idiotas', 38))}","{0: {'text': '@alferdez', 'lemma': '@alferdez'..."
3,1455544578203913984,que,"['pedir', 'entender', 'creer']",creer,"(entendido, 7)",4,"(entendido, 7)",4.0,@diego_espacio @E_FLEISCHMAN Creo q ni tú has ...,@diego_espacio @e_fleischman >>CREO Q ni tú h...,@diego_espacio[ROOT] @e_fleischman[flat] CREO...,"{'creer': (('Creo', 2), ('entendido', 7)), 'en...","{0: {'text': '@diego_espacio', 'lemma': '@dieg..."
4,1455544578203913984,que,"['pedir', 'entender', 'creer']",entender,"(preguntado, 11)",3,"(preguntado, 11)",3.0,@diego_espacio @E_FLEISCHMAN Creo q ni tú has ...,@diego_espacio @e_fleischman creo Q ni tú has...,@diego_espacio[ROOT] @e_fleischman[flat] CREO...,"{'creer': (('Creo', 2), ('entendido', 7)), 'en...","{0: {'text': '@diego_espacio', 'lemma': '@dieg..."


In [225]:
fdata.to_excel(data_dir[0]/'added-distances-(FOR ADRIAN twitter-es-creer-5-912 (1)CLEAR ANNOTATION).xlsx')