# Lexical Variety 

In [16]:
import pandas as pd
import string
import numpy as np
import math

In [17]:
# Creating functions for analyzing lexical variety.
'''
A function that returns Yules K.
'''

def K(speech):
  words = speech.split()
  N = len(words)

  word_frequencies = {}
  for word in words:
      if word in word_frequencies:
          word_frequencies[word] += 1
      else:
          word_frequencies[word] = 1

  frequency_of_frequencies = {}
  for frequency in word_frequencies.values():
      if frequency in frequency_of_frequencies:
          frequency_of_frequencies[frequency] += 1
      else:
          frequency_of_frequencies[frequency] = 1

  sum_i_squared_vi = sum(i**2 * vi for i, vi in frequency_of_frequencies.items())

  K = 10**4 * (sum_i_squared_vi - N) / N**2

  return K

'''
Create a function that calculates entropy.
'''
def entropy(speech):
    words = speech.split()
    N = len(words)

    word_frequencies = {}
    for word in words:
        word_frequencies[word] = word_frequencies.get(word, 0) + 1

    word_probabilities = {word: freq / N for word, freq in word_frequencies.items()}

    entropy = -100 * sum(pv * math.log(pv, 10) for pv in word_probabilities.values())

    return entropy

In [3]:
poems = pd.read_csv("span_eng_poems_correct.csv")

In [10]:
# We will be using orig_span, hum_trans, goog_trans, and deepl_trans:
poems

Unnamed: 0.1,Unnamed: 0,orig_span,hum_trans,goog_trans,deepl_trans,translator_name,time_period
0,0,"¿Qué farayu o qué serád de mibi? \nHabibi, \nn...","What shall I do? What will become of me? \nOh,...",What farayu or what will become of mibi?\nHabi...,"What farayu or what will bed of mibi?\nHabibi,...",anon,med
1,1,"De los sos ojos tan fuertemientre llorando, \n...","His eyes flooding with bitter tears, the Cid c...","From your eyes so strong while crying,\nHe tur...","Of your eyes so strongly weeping,\nI turned my...",John A. Crow,med
2,2,"¡Merced, ya rey señor por amor de caridad! \nL...","Justice and mercy, my Lord the King, I beseech...","Mercy, now king, sir, for the love of charity!...","Merced, ya rey señor por amor de caridad!\nThe...",John Hookham Frere,med
3,3,Sant Migael de la Tunba es un grand monesterio...,San Miguel de la Tumba is a convent vast and w...,"Sant Migael de la Tunba is a great monastery,\...","Sant Migael de la Tunba is a great monastery,\...",Henry Wadsworth Longfellow,med
4,4,"Colgava delant ella un buen aventadero, \nen e...",Descending low before her face a screen of fea...,There was a good window hanging in front of he...,"It hung in front of it a good adventadero,\nin...",Henry Wadsworth Longfellow,med
...,...,...,...,...,...,...,...
260,260,"Éste, que ves, engaño colorido, \nque del arte...",This artifice of colors that you see \nWhich b...,"This one, what do you see, colorful deception,...","This, that you see, colorful deception,\nthat ...",John A. Crow,baroque
261,261,De buscar a Narciso fatigada \nsin permitir so...,"Seeking Narcissus in my weariness, \nWith neve...",Searching for Narcissus tired\nwithout allowin...,Of searching for Narcissus weary\nwithout allo...,Roderick Gill,baroque
262,262,"Esta tarde, mi bien, cuando te hablaba, \ncomo...","This afternoon. my Love, as I pled weeping, \n...","This afternoon, my good, when I spoke to you,\...","This afternoon, my good, when I was talking to...",John A. Crow,baroque
263,263,"Detente, sombra de mi bien esquivo, \nimagen d...","Stay, fleeting shadow of my love whose chain \...","Stop, shadow of my elusive good,\nimage of the...","Stop, shadow of my elusive good,\nimage of the...",John A. Crow,baroque


In [14]:
# For Yule's K Value:
print("The mean Yule's K Value across all observations for the original spanish poems is: ", np.mean(poems["orig_span"].apply(K)))
print("The mean Yule's K Value across all observations for the human translated poems is: ", np.mean(poems["hum_trans"].apply(K)))
print("The mean Yule's K Value across all observations for the Google translated poems is: ", np.mean(poems["goog_trans"].apply(K)))
print("The mean Yule's K Value across all observations for the DeepL translated poems is: ", np.mean(poems["deepl_trans"].apply(K)))

The mean Yule's K Value across all observations for the original spanish poems is:  118.53804770355354
The mean Yule's K Value across all observations for the human translated poems is:  96.00100776129915
The mean Yule's K Value across all observations for the Google translated poems is:  139.71406232063867
The mean Yule's K Value across all observations for the DeepL translated poems is:  148.88276458039417


In [18]:
# For entropy:
print("The mean entropy value across all observations for the original spanish poems is: ", np.mean(poems["orig_span"].apply(entropy)))
print("The mean entropy value across all observations for the human translated poems is: ", np.mean(poems["hum_trans"].apply(entropy)))
print("The mean entropy value across all observations for the Google translated poems is: ", np.mean(poems["goog_trans"].apply(entropy)))
print("The mean entropy value across all observations for the DeepL translated poems is: ", np.mean(poems["deepl_trans"].apply(entropy)))

The mean entropy value across all observations for the original spanish poems is:  182.52516510606466
The mean entropy value across all observations for the human translated poems is:  192.29929923606062
The mean entropy value across all observations for the Google translated poems is:  182.8303332210397
The mean entropy value across all observations for the DeepL translated poems is:  182.2127800931713
