# Feature Engineering and Syntactic Similarity

## Remark<div class='tocSkip'/>

The code in this notebook differs slightly from the printed book. 

Several layout and formatting commands, like `figsize` to control figure size or subplot commands are removed in the book.

All of this is done to simplify the code in the book and put the focus on the important parts instead of formatting.

## Setup<div class='tocSkip'/>

Set directory locations. If working on Google Colab: copy files and install required libraries.

In [4]:
import sys, os
ON_COLAB = 'google.colab' in sys.modules

if ON_COLAB:
    GIT_ROOT = 'https://github.com/blueprints-for-text-analytics-python/blueprints-text/raw/master'
    os.system(f'wget {GIT_ROOT}/ch05/setup.py')

%run -i setup.py

You are working on a local system.
Files will be searched relative to "..".


## Load Python Settings<div class="tocSkip"/>

Common imports, defaults for formatting in Matplotlib, Pandas etc.

In [5]:
%run "settings.py"

%reload_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'png'

# Data preparation

In [6]:
sentences = ["It was the best of times", 
             "it was the worst of times", 
             "it was the age of wisdom", 
             "it was the age of foolishness"]

tokenized_sentences = [[t for t in sentence.split()] for sentence in sentences]

vocabulary = set([w for s in tokenized_sentences for w in s])

import pandas as pd
[[w, i] for i,w in enumerate(vocabulary)]

[['worst', 0],
 ['foolishness', 1],
 ['best', 2],
 ['times', 3],
 ['it', 4],
 ['the', 5],
 ['It', 6],
 ['of', 7],
 ['was', 8],
 ['wisdom', 9],
 ['age', 10]]

# One-hot by hand

In [7]:
def onehot_encode(tokenized_sentence):
    return [1 if w in tokenized_sentence else 0 for w in vocabulary]

onehot = [onehot_encode(tokenized_sentence) for tokenized_sentence in tokenized_sentences]

for (sentence, oh) in zip(sentences, onehot):
    print("%s: %s" % (oh, sentence))

[0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0]: It was the best of times
[1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0]: it was the worst of times
[0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1]: it was the age of wisdom
[0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1]: it was the age of foolishness


In [8]:
pd.DataFrame(onehot, columns=list(vocabulary))

Unnamed: 0,worst,foolishness,best,times,it,the,It,of,was,wisdom,age
0,0,0,1,1,0,1,1,1,1,0,0
1,1,0,0,1,1,1,0,1,1,0,0
2,0,0,0,0,1,1,0,1,1,1,1
3,0,1,0,0,1,1,0,1,1,0,1


In [9]:
sim = [onehot[0][i] & onehot[1][i] for i in range(0, len(vocabulary))]
sum(sim)

4

In [10]:
import numpy as np
np.dot(onehot[0], onehot[1])

np.int64(4)

In [11]:
np.dot(onehot, onehot[1])

array([4, 6, 4, 4])

## Out of vocabulary

In [12]:
onehot_encode("the age of wisdom is the best of times".split())

[0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1]

In [13]:
onehot_encode("John likes to watch movies. Mary likes movies too.".split())

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

## document term matrix

In [14]:
onehot

[[0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0],
 [1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0],
 [0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1],
 [0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1]]

## similarities

In [15]:
import numpy as np
np.dot(onehot, np.transpose(onehot))

array([[6, 4, 3, 3],
       [4, 6, 4, 4],
       [3, 4, 6, 5],
       [3, 4, 5, 6]])

# scikit learn one-hot vectorization

In [16]:
from sklearn.preprocessing import MultiLabelBinarizer
lb = MultiLabelBinarizer()
lb.fit([vocabulary])
lb.transform(tokenized_sentences)

array([[1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1],
       [0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0],
       [0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0]])

# CountVectorizer

In [17]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()

In [18]:
more_sentences = sentences + ["John likes to watch movies. Mary likes movies too.",
                              "Mary also likes to watch football games."]
pd.DataFrame(more_sentences)

Unnamed: 0,0
0,It was the best of times
1,it was the worst of times
2,it was the age of wisdom
3,it was the age of foolishness
4,John likes to watch movies. Mary likes movies too.
5,Mary also likes to watch football games.


In [19]:
cv.fit(more_sentences)

0,1,2
,input,'content'
,encoding,'utf-8'
,decode_error,'strict'
,strip_accents,
,lowercase,True
,preprocessor,
,tokenizer,
,stop_words,
,token_pattern,'(?u)\\b\\w\\w+\\b'
,ngram_range,"(1, ...)"


In [20]:
print(cv.get_feature_names_out())

['age' 'also' 'best' 'foolishness' 'football' 'games' 'it' 'john' 'likes'
 'mary' 'movies' 'of' 'the' 'times' 'to' 'too' 'was' 'watch' 'wisdom'
 'worst']


In [21]:
dt = cv.transform(more_sentences)

In [22]:
dt

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 38 stored elements and shape (6, 20)>

In [23]:
pd.DataFrame(dt.toarray(), columns=cv.get_feature_names_out())

Unnamed: 0,age,also,best,foolishness,football,games,it,john,likes,mary,movies,of,the,times,to,too,was,watch,wisdom,worst
0,0,0,1,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,0
1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,1,0,0,1
2,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,0
3,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0
4,0,0,0,0,0,0,0,1,2,1,2,0,0,0,1,1,0,1,0,0
5,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0


In [24]:
from sklearn.metrics.pairwise import cosine_similarity
cosine_similarity(dt[0], dt[1])

array([[0.83333333]])

In [25]:
len(more_sentences)

6

In [26]:
pd.DataFrame(cosine_similarity(dt, dt))

Unnamed: 0,0,1,2,3,4,5
0,1.0,0.83,0.67,0.67,0.0,0.0
1,0.83,1.0,0.67,0.67,0.0,0.0
2,0.67,0.67,1.0,0.83,0.0,0.0
3,0.67,0.67,0.83,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.52
5,0.0,0.0,0.0,0.0,0.52,1.0


# TF/IDF

In [27]:
from sklearn.feature_extraction.text import TfidfTransformer
tfidf = TfidfTransformer()
tfidf_dt = tfidf.fit_transform(dt)

In [28]:
pd.DataFrame(tfidf_dt.toarray(), columns=cv.get_feature_names_out())

Unnamed: 0,age,also,best,foolishness,football,games,it,john,likes,mary,movies,of,the,times,to,too,was,watch,wisdom,worst
0,0.0,0.0,0.57,0.0,0.0,0.0,0.34,0.0,0.0,0.0,0.0,0.34,0.34,0.47,0.0,0.0,0.34,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.34,0.0,0.0,0.0,0.0,0.34,0.34,0.47,0.0,0.0,0.34,0.0,0.0,0.57
2,0.47,0.0,0.0,0.0,0.0,0.0,0.34,0.0,0.0,0.0,0.0,0.34,0.34,0.0,0.0,0.0,0.34,0.0,0.57,0.0
3,0.47,0.0,0.0,0.57,0.0,0.0,0.34,0.0,0.0,0.0,0.0,0.34,0.34,0.0,0.0,0.0,0.34,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.31,0.5,0.25,0.61,0.0,0.0,0.0,0.25,0.31,0.0,0.25,0.0,0.0
5,0.0,0.42,0.0,0.0,0.42,0.42,0.0,0.0,0.34,0.34,0.0,0.0,0.0,0.0,0.34,0.0,0.0,0.34,0.0,0.0


In [29]:
pd.DataFrame(cosine_similarity(tfidf_dt, tfidf_dt))

Unnamed: 0,0,1,2,3,4,5
0,1.0,0.68,0.46,0.46,0.0,0.0
1,0.68,1.0,0.46,0.46,0.0,0.0
2,0.46,0.46,1.0,0.68,0.0,0.0
3,0.46,0.46,0.68,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.43
5,0.0,0.0,0.0,0.0,0.43,1.0


In [30]:
file = "datos/lima-andina-articulos-texto-fecha.csv"
anuncios = pd.read_csv(file, parse_dates=["Fecha"])
anuncios.head()


Unnamed: 0,Fecha,Texto
0,1906-08-02,"SOCIEDAD HIJOS DE ANCASH - Anteanoche tuvo lugar una reunión de estudiantes ancashinos residentes en esta capital, con el objeto de propender al desarrollo de los intereses del departamento de Anc..."
1,1906-08-12,"SOCIEDAD HIJOS DE ANCASH - A las dos de la tarde de hoy celebrará sesión esta sociedad, en el local de la calle del Tigre, número 173, con el objeto de ocuparse de la incorporación de los Ancashin..."
2,1906-09-15,"SOCIEDAD HIJOS DE ANCASH - El día de mañana domingo, a las 2 de la tarde, celebrará sesión esta sociedad en el local de la calle del Tigre número 173."
3,1906-10-14,"SOCIEDAD HIJOS DE ANCASH - Hoy a las 3 de la tarde celebrará esta sociedad sesión de junta general, en la casa número 137 de la calle del Tigre."
4,1906-10-28,"HIJOS DE ANCASH - Mañana a las dos de la tarde, celebrará sesión de junta general la sociedad de este nombre, en el local de la calle del Tigre número 173."


In [31]:
anuncios['Texto'].fillna('unkown', inplace=True)
anuncios.head()

Unnamed: 0,Fecha,Texto
0,1906-08-02,"SOCIEDAD HIJOS DE ANCASH - Anteanoche tuvo lugar una reunión de estudiantes ancashinos residentes en esta capital, con el objeto de propender al desarrollo de los intereses del departamento de Anc..."
1,1906-08-12,"SOCIEDAD HIJOS DE ANCASH - A las dos de la tarde de hoy celebrará sesión esta sociedad, en el local de la calle del Tigre, número 173, con el objeto de ocuparse de la incorporación de los Ancashin..."
2,1906-09-15,"SOCIEDAD HIJOS DE ANCASH - El día de mañana domingo, a las 2 de la tarde, celebrará sesión esta sociedad en el local de la calle del Tigre número 173."
3,1906-10-14,"SOCIEDAD HIJOS DE ANCASH - Hoy a las 3 de la tarde celebrará esta sociedad sesión de junta general, en la casa número 137 de la calle del Tigre."
4,1906-10-28,"HIJOS DE ANCASH - Mañana a las dos de la tarde, celebrará sesión de junta general la sociedad de este nombre, en el local de la calle del Tigre número 173."


In [32]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()
dt = tfidf.fit_transform(anuncios["Texto"])

In [33]:
dt

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 228634 stored elements and shape (6575, 14165)>

In [34]:
dt.data.nbytes

1829072

In [35]:
%%time
cosine_similarity(dt[0:10000], dt[0:10000])

CPU times: user 1.12 s, sys: 258 ms, total: 1.38 s
Wall time: 1.42 s


array([[1.        , 0.31114423, 0.25800858, ..., 0.11339386, 0.19595568,
        0.15608257],
       [0.31114423, 1.        , 0.5417709 , ..., 0.1672854 , 0.19733392,
        0.12933128],
       [0.25800858, 0.5417709 , 1.        , ..., 0.26003889, 0.23261051,
        0.1618944 ],
       ...,
       [0.11339386, 0.1672854 , 0.26003889, ..., 1.        , 0.21845166,
        0.08319015],
       [0.19595568, 0.19733392, 0.23261051, ..., 0.21845166, 1.        ,
        0.14464607],
       [0.15608257, 0.12933128, 0.1618944 , ..., 0.08319015, 0.14464607,
        1.        ]], shape=(6575, 6575))

## Stopwords

In [36]:
from spacy.lang.es.stop_words import STOP_WORDS as stopwords
print(len(stopwords))
tfidf = TfidfVectorizer(stop_words=list(stopwords))
dt = tfidf.fit_transform(anuncios["Texto"])
dt

521


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 155397 stored elements and shape (6575, 13769)>

## min_df

In [37]:
tfidf = TfidfVectorizer(stop_words=list(stopwords), min_df=2)
dt = tfidf.fit_transform(anuncios["Texto"])
dt

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 148877 stored elements and shape (6575, 7249)>

In [38]:
tfidf = TfidfVectorizer(stop_words=list(stopwords), min_df=.0001)
dt = tfidf.fit_transform(anuncios["Texto"])
dt

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 155397 stored elements and shape (6575, 13769)>

## max_df

In [39]:
tfidf = TfidfVectorizer(stop_words=list(stopwords), max_df=0.1)
dt = tfidf.fit_transform(anuncios["Texto"])
dt

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 112885 stored elements and shape (6575, 13740)>

In [40]:
tfidf = TfidfVectorizer(max_df=0.1)
dt = tfidf.fit_transform(anuncios["Texto"])
dt

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 128192 stored elements and shape (6575, 14113)>

## n-grams

In [41]:
tfidf = TfidfVectorizer(stop_words=list(stopwords), ngram_range=(1,2), min_df=2)
dt = tfidf.fit_transform(anuncios["Texto"])
print(dt.shape)
print(dt.data.nbytes)
tfidf = TfidfVectorizer(stop_words=list(stopwords), ngram_range=(1,3), min_df=2)
dt = tfidf.fit_transform(anuncios["Texto"])
print(dt.shape)
print(dt.data.nbytes)

(6575, 22001)
2023832
(6575, 34885)
2542520


## Lemmas

In [42]:
from tqdm.auto import tqdm
import spacy
nlp = spacy.load("es_core_news_sm")
nouns_adjectives_verbs = ["NOUN", "PROPN", "ADJ", "ADV", "VERB"]
for i, row in tqdm(anuncios.iterrows(), total=len(anuncios)):
    doc = nlp(str(row["Texto"]))
    anuncios.at[i, "lemmas"] = " ".join([token.lemma_ for token in doc])
    anuncios.at[i, "nav"] = " ".join([token.lemma_ for token in doc if token.pos_ in nouns_adjectives_verbs])

100%|██████████| 6575/6575 [01:20<00:00, 81.33it/s] 


In [43]:
anuncios.head()

Unnamed: 0,Fecha,Texto,lemmas,nav
0,1906-08-02,"SOCIEDAD HIJOS DE ANCASH - Anteanoche tuvo lugar una reunión de estudiantes ancashinos residentes en esta capital, con el objeto de propender al desarrollo de los intereses del departamento de Anc...","sociedad hijo DE ANCASH - Anteanoche tener lugar uno reunión de estudiante ancashino residente en este capital , con el objeto de propender al desarrollo de el interés del departamento de Ancash ,...",sociedad hijo ANCASH Anteanoche tener lugar reunión estudiante ancashino residente capital objeto propender desarrollo interés departamento Ancash excluir absoluto propaganda índole político inici...
1,1906-08-12,"SOCIEDAD HIJOS DE ANCASH - A las dos de la tarde de hoy celebrará sesión esta sociedad, en el local de la calle del Tigre, número 173, con el objeto de ocuparse de la incorporación de los Ancashin...","sociedad hijo DE ANCASH - A el dos de el tarde de hoy celebrar sesión este sociedad , en el local de el calle del Tigre , número 173 , con el objeto de ocupar él de el incorporación de el Ancashin...",sociedad hijo ANCASH tarde hoy celebrar sesión sociedad local calle Tigre número objeto ocupar él incorporación Ancashinos llamar discutir proyecto reglamento
2,1906-09-15,"SOCIEDAD HIJOS DE ANCASH - El día de mañana domingo, a las 2 de la tarde, celebrará sesión esta sociedad en el local de la calle del Tigre número 173.","sociedad hijo DE ANCASH - el día de mañana domingo , a el 2 de el tarde , celebrar sesión este sociedad en el local de el calle del Tigre número 173 .",sociedad hijo ANCASH día mañana domingo tarde celebrar sesión sociedad local calle Tigre número
3,1906-10-14,"SOCIEDAD HIJOS DE ANCASH - Hoy a las 3 de la tarde celebrará esta sociedad sesión de junta general, en la casa número 137 de la calle del Tigre.","sociedad hijo DE ANCASH - hoy a el 3 de el tarde celebrar este sociedad sesión de junta general , en el casa número 137 de el calle del Tigre .",sociedad hijo ANCASH hoy tarde celebrar sociedad sesión junta general casa número calle Tigre
4,1906-10-28,"HIJOS DE ANCASH - Mañana a las dos de la tarde, celebrará sesión de junta general la sociedad de este nombre, en el local de la calle del Tigre número 173.","hijo DE ANCASH - mañana a el dos de el tarde , celebrar sesión de junta general el sociedad de este nombre , en el local de el calle del Tigre número 173 .",hijo ANCASH mañana tarde celebrar sesión junta general sociedad nombre local calle Tigre número


In [44]:
tfidf = TfidfVectorizer(stop_words=list(stopwords))
dt = tfidf.fit_transform(anuncios["lemmas"].map(str))
dt

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 151243 stored elements and shape (6575, 10646)>

In [45]:
tfidf = TfidfVectorizer(stop_words=list(stopwords))
dt = tfidf.fit_transform(anuncios["nav"].map(str))
dt

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 143675 stored elements and shape (6575, 10083)>

## Finding document most similar to made-up document

In [46]:
tfidf = TfidfVectorizer(stop_words=list(stopwords), min_df=2)
dt = tfidf.fit_transform(anuncios["lemmas"].map(str))
dt

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 146602 stored elements and shape (6575, 6005)>

In [47]:
made_up = tfidf.transform(["la reunión de los artesanos terminó con baile y borrachera"])

In [48]:
sim = cosine_similarity(made_up, dt)

In [49]:
sim[0]

array([0.0999328, 0.       , 0.       , ..., 0.       , 0.       ,
       0.       ], shape=(6575,))

In [50]:
anuncios.iloc[np.argsort(sim[0])[::-1][0:5]][["Fecha", "lemmas"]]

Unnamed: 0,Fecha,lemmas
224,1915-06-26,hijo DE HUARI - él citar a todo el hijo de el provincia de Huari residente en este capital a el reunión preliminar con el objeto de tomar acuerdo para el fundación de su centro . lugar de reunión ...
5378,1930-12-08,"CENTRO CHANCAY - él citar a el socio y comprovinciano a el reunión de junta general que tener lugar el día de hoy , a el 9 de el noche , en el local de el Confederación de Artesanos , calle Tigre ..."
5398,1930-12-12,"CENTRO CHANCAY - cita a el socio y comprovinciano a el reunión de junta general que él realizar hoy , a el 9 de el noche , en el local de el Confederación de Artesanos , calle Tigre 173 ."
326,1917-06-10,"asociacion hijo DE ORCOTUNA - De orden del presidente , él citar a el socio que componer este sociedad a el reunión que celebrar hoy a el 8 p.m . en el local de el Confederación de Artesanos , cal..."
5949,1931-05-28,"CENTRO CULTURAL CHINCHA - De el secretaría de este centro él yo comunicar que hoy haber uno reunión en el Confederación de Artesanos , con el fin de tratar asunto de el provincia ."


# Finding the most similar documents

### Timing Cosine Similarity

In [51]:
%%time
cosine_similarity(dt[0:10000], dt[0:10000], dense_output=False)

CPU times: user 683 ms, sys: 43 ms, total: 726 ms
Wall time: 725 ms


<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 35205994 stored elements and shape (6575, 6575)>

In [52]:
%%time
r = cosine_similarity(dt[0:10000], dt[0:10000])
r[r > 0.9999] = 0
print(np.argmax(r))

26784138
CPU times: user 877 ms, sys: 392 ms, total: 1.27 s
Wall time: 1.43 s


In [53]:
%%time
r = cosine_similarity(dt[0:10000], dt[0:10000], dense_output=False)
r[r > 0.9999] = 0
print(np.argmax(r))

26784138
CPU times: user 7.09 s, sys: 4.49 s, total: 11.6 s
Wall time: 11.6 s


### Timing Dot-Product

In [54]:
%%time
r = np.dot(dt[0:10000], np.transpose(dt[0:10000]))
r[r > 0.9999] = 0
print(np.argmax(r))

26784138
CPU times: user 6.55 s, sys: 3.61 s, total: 10.2 s
Wall time: 10.2 s


## Batch

In [55]:
%%time
batch = 10000
max_sim = 0.0
max_a = None
max_b = None
for a in range(0, dt.shape[0], batch):
    for b in range(0, a+batch, batch):
        print(a, b)
        #r = np.dot(dt[a:a+batch], np.transpose(dt[b:b+batch]))
        r = cosine_similarity(dt[a:a+batch], dt[b:b+batch], dense_output=False)
        # eliminate identical vectors
        # by setting their similarity to np.nan which gets sorted out
        r[r > 0.9999] = 0
        sim = r.max()
        if sim > max_sim:
            # argmax returns a single value which we have to 
            # map to the two dimensions            
            (max_a, max_b) = np.unravel_index(np.argmax(r), r.shape)
            # adjust offsets in corpus (this is a submatrix)
            max_a += a
            max_b += b
            max_sim = sim

0 0
CPU times: user 7.07 s, sys: 3.84 s, total: 10.9 s
Wall time: 10.9 s


In [56]:
print(max_a, max_b)

4073 4163


In [57]:
print(max_sim)

0.9935822512545226


In [58]:
pd.set_option('max_colwidth', 1)
anuncios.iloc[[max_a, max_b]][["Fecha", "Texto"]]

Unnamed: 0,Fecha,Texto
4073,1927-12-17,SOCIEDAD CIRCULO MACATINO - Sesionará hoy a las 8 p.m. en el local de la Quinta Saux número 42.
4163,1928-03-10,SOCIEDAD CIRCULO MACATINO - Sesionará hoy a las 8 p.m. en la Quinta Saux número 42.


# Finding most related words

In [59]:
tfidf_word = TfidfVectorizer(stop_words=list(stopwords), min_df=1000)
dt_word = tfidf_word.fit_transform(anuncios["Texto"])

In [60]:
r = cosine_similarity(dt_word.T, dt_word.T)
np.fill_diagonal(r, 0)

In [61]:
voc = tfidf_word.get_feature_names_out()
size = r.shape[0] # quadratic
for index in np.argsort(r.flatten())[::-1][0:40]:
    a = int(index/size)
    b = index%size
    if a > b:  # avoid repetitions
        print('"%s" related to "%s"' % (voc[a], voc[b]))

"junta" related to "general"
"sesión" related to "junta"
"sesión" related to "general"
"sesión" related to "institución"
"local" related to "costumbre"
"local" related to "general"
"sesión" related to "local"
"institución" related to "general"
"junta" related to "institución"
"local" related to "junta"
"número" related to "calle"
"local" related to "centro"
"local" related to "calle"
"local" related to "institución"
"local" related to "hijos"
"sesión" related to "centro"
"general" related to "cita"
"sociedad" related to "hijos"
"general" related to "centro"
"número" related to "local"
