# Metadata

```yaml
Course:    DS 5001 
Module:    09 Lab
Topic:     Using SVD
Author:    R.C. Alvarado
Date:      28 March 2023 (revised)
```

**Purpose:** We create word vectors by applying a singular value decomposition to a pointwise mutual information word-word matrix. 

# Configuration

In [1]:
data_in = "../data/novels"
data_prefix = 'novels'

In [2]:
OHCO = ['genre', 'author', 'book', 'chapter', 'para_num', 'sent_num', 'token_num']
BAG = OHCO[2:5] # Paragraphs

# Word Embedding
window = 3

# Libraries

In [3]:
import pandas as pd
import numpy as np
import scipy as sp

# Process

In [4]:
# pd.read_csv(f'{data_in}/{data_prefix}-CORPUS.csv')

In [5]:
TOKENS = pd.read_csv(f'{data_in}/{data_prefix}-TOKENS.csv')

In [6]:
TOKENS

Unnamed: 0,book,chapter,para_num,sent_num,token_num,pos,term_str,term_id
0,secretadversary,1,0,1,0,DT,the,24127
1,secretadversary,1,0,1,1,NNP,young,27354
2,secretadversary,1,0,1,2,NNP,adventurers,399
3,secretadversary,1,0,1,3,NNP,ltd,14406
4,secretadversary,1,1,0,0,JJ,tommy,24529
...,...,...,...,...,...,...,...,...
1500412,baskervilles,11,114,1,7,RBR,more,15586
1500413,baskervilles,11,114,1,8,JJ,comfortable,4529
1500414,baskervilles,11,114,1,9,IN,outside,16771
1500415,baskervilles,11,114,1,10,IN,than,24112


In [7]:
PARAS = TOKENS.groupby(BAG).apply(lambda x: x.term_str.tolist()).reset_index(drop=True)

In [8]:
PARAS.head()

0                            [a, scandal, in, bohemia]
1                                                  [i]
2    [to, sherlock, holmes, she, is, always, the, w...
3    [i, had, seen, little, of, holmes, lately, my,...
4    [one, night, it, was, on, the, twentieth, of, ...
dtype: object

In [67]:
def get_context_words(x):
    data = []
    id  = x.name
    row = x[0]
    for i in range(len(row)):
        data2 = []
        for j in range(-2,3):
            a = i + j
            if a >= 0 and a < len(row):
                data2.append((j, row[a])) 
        data.append(data2)
    return data

In [68]:
TEST = PARAS.to_frame(0).apply(get_context_words, 1)

In [72]:
TEST

0        [[(0, a), (1, scandal), (2, in)], [(-1, a), (0...
1                                               [[(0, i)]]
2        [[(0, to), (1, sherlock), (2, holmes)], [(-1, ...
3        [[(0, i), (1, had), (2, seen)], [(-1, i), (0, ...
4        [[(0, one), (1, night), (2, it)], [(-1, one), ...
                               ...                        
27336    [[(0, and), (1, now), (2, the)], [(-1, and), (...
27337    [[(0, no), (1, sooner), (2, had)], [(-1, no), ...
27338    [[(0, not), (1, hear), (2, it)], [(-1, not), (...
27339    [[(0, as), (1, if), (2, in)], [(-1, as), (0, i...
27340    [[(0, from), (1, that), (2, chamber)], [(-1, f...
Length: 27341, dtype: object

In [70]:
TEST2 = pd.DataFrame([(i, j, item[0], item[1]) 
     for i, row in enumerate(TEST)
        for j, row2 in enumerate(row)
            for item in row2])

In [71]:
TEST2

Unnamed: 0,0,1,2,3
0,0,0,0,a
1,0,0,1,scandal
2,0,0,2,in
3,0,1,-1,a
4,0,1,0,scandal
...,...,...,...,...
7339002,27340,174,0,of
7339003,27340,174,1,usher
7339004,27340,175,-2,house
7339005,27340,175,-1,of


In [73]:
TEST2.columns = ['bag_id', 'window_id', 'offset', 'term_str']

In [74]:
A = TEST2[TEST2.offset == 0].reset_index(drop=True)
B = TEST2[TEST2.offset != 0].reset_index(drop=True)
skipgrams = A.merge(B, on=['bag_id','window_id'], how='left')\
    .rename(columns={'term_str_x':'target','term_str_y':'probe','offset_y':'dist'})

In [75]:
skipgrams = skipgrams[['target','probe','dist']].dropna().sort_values('target').reset_index(drop=True)
skipgrams['dist'] = skipgrams['dist'].astype('int')

In [76]:
skipgrams.head()

Unnamed: 0,target,probe,dist
0,a,scandal,1
1,a,curious,1
2,a,extraordinary,-1
3,a,was,-2
4,a,pace,2


In [16]:
# skipgrams.set_index(['target','probe'])#.unstack()

In [17]:
# TEST2 = TEST2.set_index(['bag_id','window_id', 'offset'])
# TEST2.unstack().fillna('')

# Add Skigram weights (as GloVe does)

In [18]:
# skipgrams['glove_weight'] = np.round(np.abs(1 / skipgrams['dist']), 2)

In [19]:
# skipgrams.head(10)

# Get Unigram Probabilities

We have already computed these in the vocab table.

# Import vocab table

In [77]:
VOCAB = pd.read_csv(f'{data_in}/{data_prefix}-VOCAB.csv')
# VOCAB = VOCAB[VOCAB.stop == 0]

In [78]:
# vocab = tx.get_table('vocab', db_file, index_col=['term_id'])
# vocab = vocab[vocab.stop == 0]

In [79]:
VOCAB.sort_values('p', ascending=False).head()

Unnamed: 0,term_id,term_str,n,p,port_stem,stop,df,idf,tfidf_sum,tfidf_mean,tfidf_max,pos_max
24127,24127,the,85329,0.05687,the,1,320,0.0,0.0,0.0,0.0,DT
24470,24470,to,45176,0.030109,to,1,320,0.0,0.0,0.0,0.0,TO
862,862,and,44991,0.029986,and,1,320,0.0,0.0,0.0,0.0,CC
16459,16459,of,42638,0.028417,of,1,320,0.0,0.0,0.0,0.0,IN
11947,11947,i,32985,0.021984,i,1,316,0.005463,180.193615,0.563105,3.403384,PRP


# Get $P(x)$

In [80]:
p_x = VOCAB[['term_str','p']].reset_index().set_index('term_str')['p']

In [81]:
p_x.sort_values(ascending=False).head()

term_str
the    0.056870
to     0.030109
and    0.029986
of     0.028417
i      0.021984
Name: p, dtype: float64

In [None]:
# skipgrams.groupby('target').target.count() / skipgrams.target.sum()

# Compute Normalized PMI for Skipgrams

**PMI**

$log \dfrac{P(x,y)}{P(x)P(y)}$

**NMPI**

$\dfrac{log\dfrac{P(x,y)}{P(x)P(y)}}{-log P(x,y)}$

See [G. Bouma 2009, eq. 7](https://pdfs.semanticscholar.org/1521/8d9c029cbb903ae7c729b2c644c24994c201.pdf)

# Create compressed skipgram table

In [26]:
skipgrams2 = skipgrams.groupby(['target','probe']).probe.count()\
    .to_frame().rename(columns={'probe':'n'})\
    .reset_index().set_index(['target','probe'])

In [27]:
skipgrams2.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,n
target,probe,Unnamed: 2_level_1
a,a,246
a,aback,1
a,abandon,2
a,abandons,1
a,abated,1
a,abatement,1
a,abbess,4
a,abbey,2
a,abbot,3
a,abc,2


# Compute $P(x,y)$

In [28]:
N = skipgrams2.n.sum() # Might smooth by adding value to pairs with 0-value

In [29]:
skipgrams2['p_xy'] = skipgrams2.n / N

In [30]:
skipgrams2.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,n,p_xy
target,probe,Unnamed: 2_level_1,Unnamed: 3_level_1
a,a,246,4.213358e-05
a,aback,1,1.712747e-07
a,abandon,2,3.425494e-07
a,abandons,1,1.712747e-07
a,abated,1,1.712747e-07
a,abatement,1,1.712747e-07
a,abbess,4,6.850988e-07
a,abbey,2,3.425494e-07
a,abbot,3,5.138241e-07
a,abc,2,3.425494e-07


# Compute $PMI(x;y)$

In [31]:
skipgrams2['pmi_xy'] = skipgrams2.apply(lambda row: np.log(row.p_xy / (p_x.loc[row.name[0]] * p_x.loc[row.name[1]])), 1)

In [32]:
skipgrams2.sort_values('pmi_xy', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,n,p_xy,pmi_xy
target,probe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
twarnt,crows,2,3.425494e-07,12.86251
cest,loeuvre,2,3.425494e-07,12.86251
rien,cest,2,3.425494e-07,12.86251
charly,magne,2,3.425494e-07,12.86251
patria,nunc,2,3.425494e-07,12.86251
crows,twarnt,2,3.425494e-07,12.86251
wholeman,marquand,2,3.425494e-07,12.86251
loeuvre,cest,2,3.425494e-07,12.86251
marquand,wholeman,2,3.425494e-07,12.86251
nunc,fracto,2,3.425494e-07,12.86251


In [33]:
skipgrams2['npmi_xy'] = skipgrams2.pmi_xy / -( np.log(skipgrams2.p_xy) )

In [34]:
skipgrams2.sort_values('npmi_xy', ascending=False).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,n,p_xy,pmi_xy,npmi_xy
target,probe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
smack,smack,6,1.027648e-06,12.457045,0.903454
ry,ve,3,5.138241e-07,12.574828,0.868344
ve,ry,3,5.138241e-07,12.574828,0.868344
cest,rien,2,3.425494e-07,12.86251,0.864018
marquand,wholeman,2,3.425494e-07,12.86251,0.864018


# Keep only positives

Changed since lab.

In [35]:
skipgrams2.loc[skipgrams2.npmi_xy < 0, 'pnpmi_xy'] = 0
skipgrams2.loc[skipgrams2.npmi_xy >= 0, 'pnpmi_xy'] =  skipgrams2.npmi_xy

In [36]:
skipgrams2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,n,p_xy,pmi_xy,npmi_xy,pnpmi_xy
target,probe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
a,a,246,4.213358e-05,-2.149792,-0.213386,0.0
a,aback,1,1.712747e-07,0.406469,0.026089,0.026089
a,abandon,2,3.425494e-07,-0.487349,-0.032737,0.0
a,abandons,1,1.712747e-07,1.910546,0.122628,0.122628
a,abated,1,1.712747e-07,0.118787,0.007624,0.007624


# Create PNPMI Matrix

In [37]:
SGM = skipgrams2.npmi_xy.unstack().fillna(0)

In [38]:
SGM.head()

probe,a,aback,abaft,abandon,abandoned,abandoning,abandons,abasement,abashed,abate,...,zoöphagy,zufalle,zum,zuniga,zusammen,à,æt,ætat,ça,émeutes
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a,-0.213386,0.026089,0.0,-0.032737,0.0,0.0,0.122628,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aback,0.026089,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abaft,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abandon,-0.032737,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abandoned,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [39]:
SGM.loc['man'].sort_values(ascending=False).head()

probe
legged         0.340504
wounding       0.326470
personating    0.326470
incites        0.326470
unpractical    0.326470
Name: man, dtype: float64

In [40]:
skipgrams2.loc['prussian'].sort_values('n', ascending=False)

Unnamed: 0_level_0,n,p_xy,pmi_xy,npmi_xy,pnpmi_xy
probe,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
franco,1,1.712747e-07,12.86251,0.825578,0.825578
it,1,1.712747e-07,3.039961,0.19512,0.19512
the,1,1.712747e-07,1.50824,0.096806,0.096806
war,1,1.712747e-07,8.59983,0.551979,0.551979


# SVD

In [41]:
from scipy import sparse
import scipy.sparse.linalg as linalg

In [42]:
sparse = sparse.csr_matrix(SGM.values)

In [43]:
SVD = linalg.svds(sparse, k=256)

In [44]:
U, S, V = SVD

In [45]:
U.shape, S.shape, V.shape

((27378, 256), (256,), (256, 27378))

In [46]:
word_vecs = U + V.T
word_vecs_norm = word_vecs / np.sqrt(np.sum(word_vecs * word_vecs, axis=1, keepdims=True))

In [47]:
WE = pd.DataFrame(word_vecs_norm, index=SGM.index)
WE.index.name = 'word_str'

In [48]:
WE.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
word_str,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a,0.013849,0.027852,9.117356e-15,0.008469,1.951358e-15,-9.059876e-16,-4.819776e-17,0.023903,0.001839,-3.755517e-15,...,-0.128068,0.16447,-0.305037,-0.297994,-6.607001e-15,-0.05798,-0.102613,-0.276027,-8.232699e-16,0.073095
aback,0.000202,-0.025893,-1.432905e-14,0.058885,8.453735e-15,6.200973e-15,-8.478915e-15,0.121579,0.136744,1.922402e-14,...,0.033902,-0.088168,-0.139799,-0.058193,-3.408507e-15,-0.036545,-0.049069,0.025502,-1.376502e-16,0.055713
abaft,0.042357,-0.043332,1.2854510000000002e-17,-0.06056,-1.418736e-14,-1.24046e-15,-1.728931e-15,0.021453,0.067742,1.922713e-14,...,-0.032025,-0.024272,0.087306,-0.050848,-4.884714e-16,-0.004592,0.002073,-0.07961,-8.034068e-17,0.022465
abandon,0.007568,-0.014853,2.175405e-14,-0.123054,-2.934644e-14,-3.685461e-15,4.45437e-15,0.004444,-0.007751,-2.64642e-14,...,0.076529,0.142699,0.133869,0.063106,1.322586e-15,0.011911,-0.121623,0.153128,-4.9908910000000005e-17,0.123737
abandoned,-0.013846,0.122148,5.677193e-14,-0.117773,-2.345948e-15,-1.354161e-14,-7.636808e-15,0.032982,-0.0354,-1.330202e-14,...,0.021848,0.005364,0.008472,0.062229,-1.477448e-15,-0.019922,-0.086218,0.093767,1.197931e-16,0.132631


In [49]:
def word_sims(word, n=10):
    try:
        sims = SGM.loc[word].sort_values(ascending=False).head(n).reset_index().values
        return sims
    except KeyError as e:
        print('Word "{}" not in vocabulary.'.format(word))
        return None

In [50]:
print(word_sims('happy'))

[['transit' 0.45355928441921106]
 ['anniversary' 0.45355928441921106]
 ['prosperous' 0.413127131611673]
 ['supremely' 0.4090697268137657]
 ['swain' 0.38155540016356587]
 ['prospero' 0.338555446335461]
 ['compleatly' 0.338555446335461]
 ['thankfulness' 0.32866130589539844]
 ['bygone' 0.32866130589539844]
 ['dauntless' 0.3200906116028747]]


In [51]:
def word_sim_report(word):
    sims = word_sims(word)
    for sim_word, score in sims:
        context = ' '.join(skipgrams2.loc[sim_word].index.values.tolist()[:5])
        print("{} ({}) {}".format(sim_word.upper(), score, context))
        print('-'*80)

In [52]:
word_sim_report('woman')

UNMENTIONABLE (0.4188501650732945) presence some stood woman
--------------------------------------------------------------------------------
SHOD (0.4188501650732945) a elderly slip woman
--------------------------------------------------------------------------------
JACKONET (0.4188501650732945) is or the woman
--------------------------------------------------------------------------------
GRABS (0.4188501650732945) at her married woman
--------------------------------------------------------------------------------
PROSING (0.4188501650732945) of old this woman
--------------------------------------------------------------------------------
REFORMED (0.4188501650732945) a be but woman
--------------------------------------------------------------------------------
SILHOUETTED (0.4188501650732945) against the was woman
--------------------------------------------------------------------------------
LAUNDRY (0.4111158023726) and as at brought folded
---------------------------------

In [53]:
word_sim_report('man')

LEGGED (0.3405039403413688) a alone an and are
--------------------------------------------------------------------------------
WOUNDING (0.32646954828747865) aim at man the
--------------------------------------------------------------------------------
PERSONATING (0.32646954828747865) a man mechanic the
--------------------------------------------------------------------------------
INCITES (0.32646954828747865) man other rogue the
--------------------------------------------------------------------------------
UNPRACTICAL (0.32646954828747865) an and man myself
--------------------------------------------------------------------------------
DANDIFIED (0.32646954828747865) little man quaint this
--------------------------------------------------------------------------------
COARSELY (0.32646954828747865) as clad man sized
--------------------------------------------------------------------------------
CLOYS (0.32646954828747865) man only possession which
---------------------------

In [54]:
word_sim_report('young')

ADVENTURERS (0.4218204316609113) a after all and as
--------------------------------------------------------------------------------
LTD (0.416433825110785) adventurers downwards is responded that
--------------------------------------------------------------------------------
CRATCHITS (0.40303898373373426) about again and as at
--------------------------------------------------------------------------------
BRIGHAM (0.39704423413499956) great has himself our religion
--------------------------------------------------------------------------------
BERESFORDS (0.39704423413499956) ah being in letter to
--------------------------------------------------------------------------------
WOMANHOOD (0.3793799118085404) had her i young
--------------------------------------------------------------------------------
SMIRKING (0.3793799118085404) at drebber his young
--------------------------------------------------------------------------------
OPENSHAWS (0.3793799118085404) of young
---------

# Define some semantic functions

Added after lecture.

In [55]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances

In [56]:
def get_word_vector(term_str):
    """Get a numpy array from the glove matrix and shape for input into cosine function"""
    return SGM.loc[term_str].values.reshape(-1, 1).T

def get_nearest_vector(wv, method='cosine', n=1):
    """Get the nearest word vectors to a given word vector"""
    if method == 'cosine':
        sims = cosine_similarity(SGM.values, wv)
    elif method == 'euclidean':
        eds = euclidean_distances(SGM.values, wv)
        sims = 1 - (eds/eds.max())
    else:
        print('Invalid method {}; defaulting to cosine.'.format(method))
        sims = cosine_similarity(SGM.values, wv)
    return pd.DataFrame(sims, index=SGM.index, columns=['score']).sort_values('score',ascending=False).head(n+1).iloc[1:]

def get_sims(term_str, method='cosine', n=10):
    """Get the top n words for a given word based on cosine similarity"""
    wv = get_word_vector(term_str)
    sims =  get_nearest_vector(wv, method=method, n=n) 
    return sims

def get_analogy(a, b, c, method='cosine'):
    """Infer missing analogical term"""
    print()
    try:
        A = get_word_vector(a)
        B = get_word_vector(b)
        C = get_word_vector(c)
        D = np.add(np.subtract(B, A), C)
        X = get_nearest_vector(C, method=method, n=1)
        return X.iloc[0].name
    except ValueError as e:
        print(e)
        return None

In [57]:
get_nearest_vector(get_word_vector('woman'),  n=10)

Unnamed: 0_level_0,score
word_str,Unnamed: 1_level_1
man,0.136766
gentleman,0.122441
girl,0.122036
fellow,0.100054
lady,0.096727
enough,0.092133
women,0.090621
young,0.086759
creature,0.086439
men,0.084672


In [58]:
def get_opposite(a, b, method='cosine'):
    A = get_word_vector(a)
    B = get_word_vector(b)
    C = np.subtract(A, B)
    X = get_nearest_vector(C, n=1, method=method)
    return X
#     return X.iloc[0].name

In [59]:
get_sims('woman')

Unnamed: 0_level_0,score
word_str,Unnamed: 1_level_1
man,0.136766
gentleman,0.122441
girl,0.122036
fellow,0.100054
lady,0.096727
enough,0.092133
women,0.090621
young,0.086759
creature,0.086439
men,0.084672


In [60]:
test = get_nearest_vector(get_word_vector('king'), n=10)

In [61]:
test

Unnamed: 0_level_0,score
word_str,Unnamed: 1_level_1
versus,0.170774
wargrave,0.167875
rents,0.140268
longitudinal,0.128922
smollet,0.126644
felstein,0.115455
intuitions,0.114274
litre,0.112416
hanover,0.107851
duddings,0.101666


In [62]:
get_sims('love')

Unnamed: 0_level_0,score
word_str,Unnamed: 1_level_1
loved,0.104928
affection,0.103414
your,0.090075
friendship,0.083776
tenderness,0.083345
esteem,0.082778
pity,0.080782
florentine,0.078317
transylvanian,0.074951
murdrer,0.074201


In [63]:
get_opposite('man','beard')

Unnamed: 0_level_0,score
word_str,Unnamed: 1_level_1
woman,0.115028


In [64]:
get_analogy('man','boy','girl')




'woman'

In [65]:
get_analogy('male', 'king', 'female')




'garbed'

In [66]:
SGM

probe,a,aback,abaft,abandon,abandoned,abandoning,abandons,abasement,abashed,abate,...,zoöphagy,zufalle,zum,zuniga,zusammen,à,æt,ætat,ça,émeutes
word_str,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a,-0.213386,0.026089,0.0,-0.032737,0.0,0.0,0.122628,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
aback,0.026089,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abaft,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abandon,-0.032737,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
abandoned,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
à,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
æt,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ætat,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ça,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
