# Overlap metrics

This section contains the evaluation of 5sequences generations of large and small models.

In [None]:
from IPython.display import clear_output

#!pip install transformers==3.3.1
!pip install -U git+https://github.com/huggingface/transformers.git
!pip install datasets==1.0.2
#!pip install datasets==1.5.0

#clear_output()

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
c =  '#7eca9c'

In [None]:
from datasets import load_dataset, Dataset
from itertools import product
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
import transformers
import datasets
from transformers import AutoTokenizer, TFT5ForConditionalGeneration
import datetime
import os
import pandas
import warnings
from functools import reduce
warnings.filterwarnings('ignore')

In [None]:
tf_version = tf.__version__
print("Tensorflow: ", tf_version)
print("Transformers: ", transformers.__version__)
print("Datasets: ", datasets.__version__)


In [None]:
!pip install rouge_score
!pip install bleu

from datasets import load_metric
rouge = load_metric("rouge", seed=0)
bleu = load_metric("bleu", seed = 0)
import string

#things necessary to compute bleurt
import sys
sys.argv = sys.argv[:1]
clear_output()

## **Setup**

In [None]:
#row wise
def evaluator(somedata, metric): #takes data
  """
  input
    somedata: input data, column names should be of the type MODEL_decodingnr (i.e. DIALO_kp0, GPT2_tk0,...)
    can take also a dataset with different models generations at once
    metric: a string, either 'rouge', 'bleu-1', 'bleu-3' or 'bleu-4'

  output
    the input dataframe is extended with one column per generation containing the related rouge/bleu score
  """
  data = somedata.copy()
  data = data.fillna('')
  tf.random.set_seed(0)
  cols = [el for el in list(data.columns) if el not in ['index','HS','reference_CN', 'decoding']]
  diz = dict((el,[]) for el in cols)
  diz['index'] = []

  if metric == 'rouge':
    rouge = load_metric("rouge", seed=0)

    for i in range(len(data)):
      diz['index'].append(data.loc[i,'index'])
      for c in cols:
        rouge.add(prediction = data[c][i], reference = data['reference_CN'][i])
        rouge_output = rouge.compute(rouge_types=['rougeL'], use_agregator=False)
        diz[c].append(rouge_output['rougeL'][0][2]) #fmeasure

  if metric == 'bleu-1':
    bleu = load_metric("bleu", seed = 0)
    diz['index'] = data['index']
    for c in cols:
      bleu_tokspred = [x.split() for x in data[c].to_list()]
      bleu_preds= [[x.translate(str.maketrans('', '', string.punctuation)).lower() for x in sublist] for sublist in bleu_tokspred]
      bleu_toksref=[x.split() for x in data['reference_CN'].to_list()]
      bleu_refs= [[[x.translate(str.maketrans('', '', string.punctuation)).lower() for x in sublist]] for sublist in bleu_toksref]
      for i in range(len(data)):
        if len(bleu_preds[i]) == 0:
          diz[c].append(0)
        else:
          bleu.add(prediction=bleu_preds[i], reference=bleu_refs[i])
          bleu_output = bleu.compute(max_order=1)
          diz[c].append(bleu_output['bleu'])

  if metric == 'bleu-3':
    diz['index'] = data['index']
    bleu = load_metric("bleu", seed = 0)
    for c in cols:
      bleu_tokspred = [x.split() for x in data[c].to_list()]
      bleu_preds= [[x.translate(str.maketrans('', '', string.punctuation)).lower() for x in sublist] for sublist in bleu_tokspred]
      bleu_toksref=[x.split() for x in data['reference_CN'].to_list()]
      bleu_refs= [[[x.translate(str.maketrans('', '', string.punctuation)).lower() for x in sublist]] for sublist in bleu_toksref]

      for i in range(len(data)):
        if len(bleu_preds[i]) == 0:
          diz[c].append(0)
        else:
          bleu.add(prediction=bleu_preds[i], reference=bleu_refs[i])
          bleu_output = bleu.compute(max_order=3)
          diz[c].append(bleu_output['bleu'])

  if metric == 'bleu-4':
    diz['index'] = data['index']
    bleu = load_metric("bleu", seed = 0)
    for c in cols:
      bleu_tokspred = [x.split() for x in data[c].to_list()]
      bleu_preds= [[x.translate(str.maketrans('', '', string.punctuation)).lower() for x in sublist] for sublist in bleu_tokspred]
      bleu_toksref=[x.split() for x in data['reference_CN'].to_list()]
      bleu_refs= [[[x.translate(str.maketrans('', '', string.punctuation)).lower() for x in sublist]] for sublist in bleu_toksref]

      for i in range(len(data)):
        if len(bleu_preds[i]) == 0:
          diz[c].append(0)
        else:
          bleu.add(prediction=bleu_preds[i], reference=bleu_refs[i])
          bleu_output = bleu.compute(max_order=4)
          diz[c].append(bleu_output['bleu'])

  out_data = pandas.DataFrame(diz)
  out_data.columns = [metric+'_'+el if el!='index' else el for el in list(out_data.columns)]
  return data.merge(out_data, on='index')


In [None]:
def selecter(evaluated_data, metric, by):
  """
  input
    evaluated_data: data evaluated by evaluator
    metric: a string, for the moment only rouge implemented
    by: select the generation with highest score among 'all' rows, 'model', 'decoding', 'model-decoding'

  output
    dataframe containing the selected columns and related scores
  """

  data = evaluated_data.copy()
  #keep only numeric columns with scores
  filter_col = [col for col in data.columns if col.startswith(metric)]
  dataf = data[filter_col]
  out_data =  data[['index', 'HS', 'generated_CN', 'reference_CN', 'decoding']]

  #the highest generation is chosen for each model-decoding (e.g. highest T5_tk, highest DIALO_tp, ecc)
  if by == 'model_decoding':
    all_mod_deco = list(set([x[:-1] for x in dataf.columns]))
    for mod_deco in all_mod_deco:
      #select all columns which have in common the column name apart from the final number (T5_tk, DIALO_tp, ecc)
      filter_mod_deco = [col for col in data.columns if col.startswith(mod_deco)]
      dataf_mod_deco = dataf[filter_mod_deco]
      #create a column with names of columns with highest rouge score for that row
      out_data['max_'+mod_deco] = dataf_mod_deco.idxmax(axis=1)
      selected = []
      scores = []
      for i in range(len(data)):
        c = out_data.loc[i, 'max_'+mod_deco]
        selected.append(data.loc[i,c.replace(metric+'_','')])
        scores.append(data.loc[i,c])
      out_data[mod_deco+'_selected'] = selected
      out_data[mod_deco+'_score'] = scores

  #the highest generation is chosen for each model (e.g. highest T5, highest DIALO, ecc)
  if by == 'model':
    all_mod = list(set([x[:-4] for x in dataf.columns]))
    for mod in all_mod:
      #select all columns which have in common the column name apart from the final number (T5_tk, DIALO_tp, ecc)
      filter_mod = [col for col in data.columns if col.startswith(mod)]
      dataf_mod = dataf[filter_mod]
      #create a column with names of columns with highest rouge score for that row
      out_data['max_'+mod] = dataf_mod.idxmax(axis=1)
      selected = []
      scores = []
      for i in range(len(data)):
        c = out_data.loc[i, 'max_'+mod]
        selected.append(data.loc[i,c.replace(metric+'_','')])
        scores.append(data.loc[i,c])
      out_data[mod+'_selected'] = selected
    # data['decoding'] = data['rouge_max']
      out_data[mod+'_score'] = scores

    #the highest generation is chosen for each model (e.g. highest T5, highest DIALO, ecc)
  if by == 'decoding':
    all_deco = list(set([x[-3:-1] for x in dataf.columns]))
    for deco in all_deco:
      #select all columns which have in common the column name apart from the final number (T5_tk, DIALO_tp, ecc)
      filter_deco = [col for col in data.columns if deco in col and col.startswith(metric)]
      dataf_deco = dataf[filter_deco]
      #create a column with names of columns with highest rouge score for that row
      out_data['max_'+metric+'_'+deco] = dataf_deco.idxmax(axis=1)
      selected = []
      scores = []
      for i in range(len(data)):
        c = out_data.loc[i, 'max_'+metric+'_'+deco]
        selected.append(data.loc[i,c.replace(metric+'_','')])
        scores.append(data.loc[i,c])
      out_data[metric+'_'+deco+'_selected'] = selected
      out_data[metric+'_'+deco+'_score'] = scores

  #select generation with highest score row-wise, amongst all generations
  if by=='all':
    out_data[metric+'_max'] = dataf.idxmax(axis=1)
    selected = []
    scores = []
    for i in range(len(data)):
      c = out_data.loc[i,metric+'_max']
      selected.append(data.loc[i,c.replace(metric+'_','')])
      scores.append(data.loc[i,c])
    out_data['selected'] = selected
    out_data[metric+'_all_score'] = scores

  return out_data

In [None]:
def tabler(selected_data, metric, digits = 5):
  """
  input
    selected_data: data evaluated by selecter
    metric: a string, for the moment only rouge implemented

  output
    dataframe containing the results calculated on the selected columns
  """
  data = selected_data.copy()
  filter_col = [col for col in data.columns if col.startswith(metric) and col.endswith('score')]
  diz = {'subset':[], 'min':[], 'max':[], 'mean':[], 'median':[], 'std':[]}
  for c in filter_col:
    diz['subset'].append(c.replace(metric+'_','').replace('_score',''))
    diz['min'].append(round(data[c].min(),digits))
    diz['max'].append(round(data[c].max(),digits))
    diz['mean'].append(round(data[c].mean(),digits))
    diz['median'].append(round(data[c].median(),digits))
    diz['std'].append(round(data[c].std(),digits))
  return pandas.DataFrame(diz)

In [None]:
import re

def tabler2(selected_data, digits = 5):
  """
  input
    selected_data: data evaluated by selecter
    metric: a string, for the moment only rouge implemented

  output
    dataframe containing the results calculated on the selected columns
  """
  data = selected_data.copy()
  filter_col = [col for col in data.columns if re.match("[\w\-\d]+_generated_CN", col)]
  diz = {'subset':[], 'min':[], 'max':[], 'mean':[], 'median':[], 'std':[]}
  for c in filter_col:
    diz['subset'].append(c.replace('_generated_CN', '_score'))
    diz['min'].append(round(data[c].min(),digits))
    diz['max'].append(round(data[c].max(),digits))
    diz['mean'].append(round(data[c].mean(),digits))
    diz['median'].append(round(data[c].median(),digits))
    diz['std'].append(round(data[c].std(),digits))
  return pandas.DataFrame(diz)

In [None]:
def tabler3(selected_data, metric, what_to_calculate, digits = 5):
  """
  it can be visualised one thing at a time (what_to_calculate= mean, min, max, median, std)
  and as cross-table of model and decoding
  """
  data = selected_data.copy()
  diz={'decoding':['tp','tk','kp','bs',],'T5':[0,0,0,0],'dialoGPT':[0,0,0,0],'gpt2':[0,0,0,0],'BART':[0,0,0,0],
     'BERT':[0,0,0,0]}
  d = pandas.DataFrame(diz)
  for col in data.columns:
    if col.startswith('max'):
      subsets = list(set([x[:-1] for x in data[col]]))
      for s in subsets:
        model=s.split('_')[1]
        deco = s.split('_')[2]
        data_sub = data[data[col].str.contains(s)]
        if what_to_calculate == 'mean':
          d.loc[d['decoding']==deco,model]=round(data_sub[col.replace('max_','')+'_score'].mean(),digits)
        if what_to_calculate == 'min':
          d.loc[d['decoding']==deco,model]=round(data_sub[col.replace('max_','')+'_score'].min(),digits)
        if what_to_calculate == 'max':
          d.loc[d['decoding']==deco,model]=round(data_sub[col.replace('max_','')+'_score'].max(),digits)
        if what_to_calculate == 'median':
          d.loc[d['decoding']==deco,model]=round(data_sub[col.replace('max_','')+'_score'].median(),digits)
        if what_to_calculate == 'std':
          d.loc[d['decoding']==deco,model]=round(data_sub[col.replace('max_','')+'_score'].std(),digits)
        if what_to_calculate == 'count':
          diz[model]=len(data_sub)

  return d

In [None]:
def summaryr(collection_of_tables,
                                  size_of_models = 'small',
                                  include_median = False):

  """
  input: a collection of 4 tables, created with tabler1
  output: a "stacked" table with comparisons
  important: follow this order 'ROUGE', 'BLEU-1', 'BLEU-3', 'BLEU-4'
  """

  summary = [el.copy() for el in collection_of_tables] # careful: follow the order
  metric_name = ('ROUGE', 'BLEU-1', 'BLEU-3', 'BLEU-4')
  assert len(summary) == len(metric_name)

  for i in range(len(summary)):
    ctab = summary[i]
    ctab['metric'] = metric_name[i]

    # boolean column for winner
    mmean = ctab['mean'].max()
    ctab['highest_mean'] = False
    ctab.loc[ctab['mean'] >= mmean, 'highest_mean'] = True

    # int column for rank
    ctab.sort_values(by='mean', ascending = False, inplace = True)
    ctab['rank_mean'] = np.arange(1,len(ctab)+1)

    if include_median:
      # boolean column for median winner
      mmean = ctab['median'].max()
      ctab['highest_median'] = False
      ctab.loc[ctab['median'] >= mmean, 'highest_median'] = True

  done = pandas.concat(summary)
  done = done.reset_index(drop=True)
  done = done.replace({'gpt2': 'GPT-2', 'dialoGPT': 'DialoGPT'})
  done = done.sort_values(by=['rank_mean', 'metric']).reset_index(drop = True)
  done['size'] = size_of_models
  done = done.rename(columns = {'subset':'model'})
  done = done.reindex(columns=['size', 'model', 'metric', 'highest_mean', 'rank_mean', 'mean', 'std', 'min', 'median', 'max'])
  return done

In [None]:
def metrics_(collection_of_tables,
                                  size_of_models = 'small'):

  """
  input: a collection of 4 tables, created with tabler1
  output: a "stacked" table with comparisons
  important: follow this order 'ROUGE', 'BLEU-1', 'BLEU-3', 'BLEU-4'
  """

  summary = [el.copy() for el in collection_of_tables] # careful: follow the order
  metric_name = ('ROUGE', 'BLEU-1', 'BLEU-3', 'BLEU-4')
  assert len(summary) == len(metric_name)

  new = []
  for i in range(len(summary)):
    ctab = summary[i]
    ctab = ctab.sort_values(by = 'subset').reset_index(drop=True)
    met = metric_name[i]
    ctab.columns = ['subset', 'min{}'.format(met), 'max{}'.format(met),
                    'mean{}'.format(met), 'median{}'.format(met), 'std{}'.format(met)]

    new.append(ctab)


  from functools import reduce
  df = reduce(lambda df1,df2: pandas.merge(df1,df2,on='subset'), new)

  df['model']  = df.subset.apply(lambda x: x.split('_')[0])
  df['decoding']  = df.subset.apply(lambda x: x.split('_')[1])
  df['size'] = size_of_models

  df = df.reindex(columns = ['size', 'model', 'decoding', 'minROUGE', 'maxROUGE', 'meanROUGE', 'medianROUGE',
       'stdROUGE', 'minBLEU-1', 'maxBLEU-1', 'meanBLEU-1', 'medianBLEU-1',
       'stdBLEU-1', 'minBLEU-3', 'maxBLEU-3', 'meanBLEU-3', 'medianBLEU-3',
       'stdBLEU-3', 'minBLEU-4', 'maxBLEU-4', 'meanBLEU-4', 'medianBLEU-4',
       'stdBLEU-4'])

  df = df.replace({'gpt2': 'GPT-2', 'dialoGPT': 'DialoGPT'})

  df = df.sort_values(by='meanROUGE', ascending=False).reset_index(drop=True)

  return df


def displayres(master, what ='mean'):

  """this takes the result of metrics_ and display it"""

  if what == 'mean':
    c = '#e28413'
  if what == 'max':
    c = '#de3c4b'

  for mod in master.model.unique():
    curr = master[master.model == mod].copy()
    d_col = [el for el in master.columns if what in el]
    print('{} ({})'.format(curr['model'].unique()[0], curr['size'].unique()[0]))
    curr = curr[[
                #  'size', 'model',
                 'decoding',
                 '{}ROUGE'.format(what), '{}BLEU-1'.format(what),
                 '{}BLEU-3'.format(what), '{}BLEU-4'.format(what)
                 ]]
    curr = curr.sort_values(by='decoding').reset_index(drop=True)
    display(curr.style.highlight_max(d_col, color = c, axis = 0))
    print('\n')

## Compute scores

In [None]:
# path = path to_generated_data

generations = pandas.read_csv('drive/MyDrive/master/output_.csv', delimiter=';')

generations.head(1)
# len(generations.columns)

In [None]:
%%time
df_eval = evaluator(somedata=generations, metric='rouge')
df_eval_bsingle = evaluator(somedata=generations, metric='bleu-1')
#df_eval_b3 = evaluator(somedata=generations, metric='bleu-3')
#df_eval_b4 = evaluator(somedata=generations, metric='bleu-4')

In [None]:
df_eval_merged = reduce(lambda left,right: pandas.merge(left,right,on=['index', 'HS', 'generated_CN', 'reference_CN', 'Decoding'],
                                            how='outer'), [df_eval, df_eval_bsingle])
df_eval_merged.head(5)

In [None]:
df_eval_merged.to_csv('drive/MyDrive/master/metrics_output_gpt3.5.csv')

In [None]:
tabler2(df_eval_merged) #over all decodings

In [None]:
tabler2(df_eval_merged[df_eval_merged['Decoding']=='beam-search'])

In [None]:
tabler2(df_eval_merged[df_eval_merged['decoding']=='top-p'])

In [None]:
tabler2(df_eval_merged[df_eval_merged['decoding']=='top-k'])

In [None]:
tabler2(df_eval_merged[df_eval_merged['decoding']=='k-p'])

In [None]:
# filename = 'TEST_LARGE_5seq_rouge.csv'
# you_want_to_save = True
# if you_want_to_save:
#   df_eval.to_csv(filename, index = False, encoding = 'utf-8')
#   from google.colab import files
#   files.download(filename)

In [None]:
# filename = 'TEST_LARGE_5seq_bleu-1.csv'
# you_want_to_save = True
# if you_want_to_save:
#   df_eval_bsingle.to_csv(filename, index = False, encoding = 'utf-8')
#   from google.colab import files
#   files.download(filename)

In [None]:
# filename = 'TEST_LARGE_5seq_bleu-3.csv'
# you_want_to_save = True
# if you_want_to_save:
#   df_eval_b3.to_csv(filename, index = False, encoding = 'utf-8')
#   from google.colab import files
#   files.download(filename)

In [None]:
# filename = 'TEST_LARGE_5seq_bleu-4.csv'
# you_want_to_save = True
# if you_want_to_save:
#   df_eval_b4.to_csv(filename, index = False, encoding = 'utf-8')
#   from google.colab import files
#   files.download(filename)

## Select best CN among all decoding mecanisms for each model

#### Rouge

In [None]:
df_eval = pandas.read_csv('drive/MyDrive/master/trial_set_metrics.csv', delimiter=';', index_col='index')
df_sel = selecter(evaluated_data= df_eval, metric='rouge', by='model_decoding')
df_tab = tabler(selected_data= df_sel, metric='rouge')
df_tab.style.highlight_max(['max', 'mean'], color = c, axis = 0)

In [None]:
df_tab3 = tabler3(selected_data= df_sel, metric='rouge', what_to_calculate='mean')
df_tab3.style.highlight_max(color = c, axis = 0)

#### Bleu-1

In [None]:
df_eval_bsingle = pandas.read_csv('TEST_LARGE_5seq_bleu-1.csv')
df_sel_bsingle = selecter(evaluated_data= df_eval_bsingle, metric='bleu-1', by='model')
df_tab_bsingle = tabler(selected_data= df_sel_bsingle, metric='bleu-1')
df_tab_bsingle.style.highlight_max(['max', 'mean'], color = c, axis = 0)

In [None]:
df_tab3_bsingle = tabler3(selected_data= df_sel_bsingle, metric='bleu-1', what_to_calculate='mean')
df_tab3_bsingle.style.highlight_max(color = c, axis = 0)

#### Bleu-3

In [None]:
df_eval_b3 = pandas.read_csv('TEST_LARGE_5seq_bleu-3.csv')
df_sel_b3 = selecter(evaluated_data= df_eval_b3, metric='bleu-3', by='model')
df_tab_b3 = tabler(selected_data= df_sel_b3, metric='bleu-3')
df_tab_b3.style.highlight_max(['max', 'mean'], color = c, axis = 0)

In [None]:
df_tab3_b3 = tabler3(selected_data= df_sel_b3, metric='bleu-3', what_to_calculate='mean')
df_tab3_b3.style.highlight_max(color = c, axis = 0)

#### Bleu-4

In [None]:
df_eval_b4 = pandas.read_csv('TEST_LARGE_5seq_bleu-4.csv')
df_sel_b4 = selecter(evaluated_data= df_eval_b4, metric='bleu-4', by='model')
df_tab_b4 = tabler(selected_data= df_sel_b4, metric='bleu-4')
df_tab_b4.style.highlight_max(['max', 'mean'], color = c, axis = 0)

In [None]:
df_tab3_b4 = tabler3(selected_data= df_sel_b4, metric='bleu-4', what_to_calculate='mean')
df_tab3_b4.style.highlight_max(color = c, axis = 0)

### *ðŸ¡² summary*

In [None]:
avgl = summaryr((df_tab, df_tab_bsingle, df_tab_b3, df_tab_b4), size_of_models='large')
# avgl.to_csv('eval_summary1_large.csv', index = False)
avgl.style.highlight_max(['max', 'median', 'mean'], color = c, axis = 0)

In [None]:
avgl[avgl.highest_mean].round(2)

## Select best CN for each ***combination*** decoding mecanism-model

#### Rouge

In [None]:
df_eval = pandas.read_csv('TEST_LARGE_5seq_rouge.csv')
df_sel_md = selecter(evaluated_data= df_eval, metric='rouge', by='model_decoding')
df_tab_md = tabler(selected_data= df_sel_md, metric='rouge')

In [None]:
df_sel_md = selecter(evaluated_data= df_eval, metric='rouge', by='model_decoding')
df_tab_md = tabler3(selected_data= df_sel_md, metric='rouge', what_to_calculate='mean')
df_tab_md

#### Bleu-1

In [None]:
df_eval_lg_bsingle= pandas.read_csv('TEST_LARGE_5seq_bleu-1.csv')
df_sel_lg_md_bsingle = selecter(evaluated_data= df_eval_lg_bsingle, metric='bleu-1', by='model_decoding')
df_tab_lg_md2_bsingle = tabler3(selected_data= df_sel_lg_md_bsingle, metric='bleu-1', what_to_calculate='mean')
df_tab_lg_md2_bsingle

#### Bleu-3

In [None]:
df_eval_lg_b3= pandas.read_csv('TEST_LARGE_5seq_bleu-3.csv')
df_sel_lg_md_b3 = selecter(evaluated_data= df_eval_lg_b3, metric='bleu-3', by='model_decoding')
df_tab_lg_md2_b3 = tabler3(selected_data= df_sel_lg_md_b3, metric='bleu-3', what_to_calculate='mean')
df_tab_lg_md2_b3

#### Bleu-4

In [None]:
df_eval_lg_b4= pandas.read_csv('TEST_LARGE_5seq_bleu-4.csv')
df_sel_lg_md_b4 = selecter(evaluated_data= df_eval_lg_b4, metric='bleu-4', by='model_decoding')
df_tab_lg_md2_b4 = tabler3(selected_data= df_sel_lg_md_b4, metric='bleu-4', what_to_calculate='mean')
df_tab_lg_md2_b4

### *ðŸ¡² summary*

In [None]:
df_tab_lg_md = tabler(selected_data= df_sel_md, metric='rouge')
df_tab_lg_md_bsingle = tabler(selected_data= df_sel_lg_md_bsingle, metric='bleu-1')
df_tab_lg_md_b3 = tabler(selected_data= df_sel_lg_md_b3, metric='bleu-3')
df_tab_lg_md_b4 = tabler(selected_data= df_sel_lg_md_b4, metric='bleu-4')

In [None]:
tabs = [df_tab_lg_md, df_tab_lg_md_bsingle, df_tab_lg_md_b3, df_tab_lg_md_b4]
evall = metrics_(tabs, size_of_models='large')
# evall.to_csv('evaluation_large_models.csv', index=False)
evall.style.highlight_max([el for el in evall.columns if 'mean' in el], color = c, axis = 0).highlight_max([el for el in evall.columns if 'max' in el], color = 'yellow', axis = 0)

In [None]:
displayres(evall, 'mean')
displayres(evall, 'max')

# [syntactic complexity](https://spacy.io/usage/linguistic-features)

- Maximum Syntactic Depth (MSD): the maximum depth among the dependency trees calculated over each sentence composing a CN.
- Average Syntactic Depth (ASD): the depth of the sentences in each CN.
- Number of Sentences (NST)

In [None]:
import spacy
import statistics

en_nlp = spacy.load('en_core_web_sm')

In [None]:
def walk_tree(node, depth):
    if node.n_lefts + node.n_rights > 0:
        return max(walk_tree(child, depth + 1) for child in node.children)
    else:
        return depth

def get_max_sd(data):
  docu = en_nlp(data['text'])
  return max([walk_tree(sent.root, 0) for sent in docu.sents])
def get_avg_sd(data):
  docu = en_nlp(data['text'])
  return statistics.mean([walk_tree(sent.root, 0) for sent in docu.sents])
def get_nst(data):
  docu = en_nlp(data['text'])
  return len([sent for sent in docu.sents])

In [None]:
df['msd'] = df.apply(get_max_sd, axis=1)
df['asd'] = df.apply(get_avg_sd, axis=1)
df['nst'] = df.apply(get_nst, axis=1)