# Installaciones de paquetes requeridos

- LIWC: se usa este paquete para poder cargar el diccionario provisto en el directorio ```data```

In [2]:
#%pip install liwc

In [3]:
#!python -m spacy download es_core_news_sm
#nltk.download('punkt')
#nltk.download('stopwords')

In [4]:
import nltk
import string
import re
from collections import Counter
import pandas as pd
import spacy
from itertools import compress
import warnings
warnings.filterwarnings("ignore")
import liwc
parse, category_names = liwc.load_token_parser('data/dict_r.dic')

In [5]:
nlp = spacy.load('es_core_news_sm')

In [6]:
institutions = pd.read_excel('data/ELA - Instituciones.xlsx')

# Preprocesamiento

In [7]:
def preprocess(df, concept = None, all_less_concept = None):
  institutions = df
  if concept is not None:
    institutions = institutions[institutions["categoria"] == concept]

  if all_less_concept is not None:
    institutions = institutions[institutions["categoria"] != all_less_concept]

  institutions['fundamento'] = institutions['fundamento'].str.lower()
  #institutions['fundamento'] = institutions['fundamento'].apply(lambda x: re.sub("[0-9]", '', x))
  institutions['fundamento'] = institutions['fundamento'].str.translate(str.maketrans(' ', ' ', string.punctuation))
  institutions['fundamento'] = institutions['fundamento'].str.strip()
  institutions['fundamento'] = institutions['fundamento'].str.replace('\n', ' ')
  institutions['fundamento'] = institutions['fundamento'].str.replace('  ', ' ')
  institutions['fundamento'] = institutions['fundamento'].str.replace('   ', ' ')
  institutions['fundamento'] = institutions['fundamento'].str.replace('    ', ' ')
  institutions['fundamento'] = institutions['fundamento'].str.replace('     ', ' ')

  institutions = institutions[[len(str(x).split()) > 2 for x in institutions.fundamento]]
  return institutions.reset_index(drop=True)

# Eleccion de concepto target

In [8]:
institutions_ffaa = preprocess(institutions, "Fuerzas Armadas")

# Carga y preprocesamiento de todos los conceptos, menos el concepto target

In [9]:
institutions_less_ffaa = preprocess(institutions, all_less_concept = "Fuerzas Armadas")

# Carga y preprocesamiento de todos los conceptos

In [10]:
institutions_all = preprocess(institutions)

### 1 - Construccion de diccionario

In [11]:
%%time

df = pd.DataFrame()
l_derechos = []

for ix in range(institutions_ffaa.shape[0]):
  texto = str(institutions_ffaa.fundamento[ix]).lower()
  doc = nlp(texto)
  tokens = [word.orth_ for word in doc]
  tcounts1 = Counter(category for token in tokens for category in parse(token))
  word_derechos = list(compress(tokens, ['derechos' in parse(token) for token in tokens]))
  df_counts = pd.DataFrame.from_dict([tcounts1]).reset_index()
  df_counts['index'] = ix
  df_counts['texto'] = texto
  df_counts['word_derechos'] =  ['' if len(word_derechos) == 0 else word_derechos]
  df = pd.concat([df,df_counts], axis=0, ignore_index=True)

  if len(word_derechos)>0:
        l_derechos.extend(word_derechos)

CPU times: user 37 s, sys: 287 ms, total: 37.3 s
Wall time: 37.3 s


In [12]:
df_concept_ffaa = pd.DataFrame.from_dict(Counter(l_derechos), orient = 'index').reset_index()
df_concept_ffaa = df_concept_ffaa.rename(columns = {'index' : 'termino', 0 : 'count'})
df_concept_ffaa.sort_values('count', ascending = False)[0:20]

Unnamed: 0,termino,count
0,proteger,73
2,defender,58
9,respetar,25
8,cumplir,21
13,decir,15
7,ejercer,13
5,contribuir,12
3,responder,9
11,trabajar,5
12,procurar,5


### 2 - Verificacion/Validacion diccionario

### Diccionario original

Se dejan los conceptos cargados en el diccionario ```dict_r.dic```

In [21]:
#Original
df_concept_ffaa

Unnamed: 0,termino,count
0,proteger,73
1,apoyar,4
2,defender,58
3,responder,9
4,usar,4
5,contribuir,12
6,vivir,1
7,ejercer,13
8,cumplir,21
9,respetar,25


### Dataframe luego del procesamiento del punto 3 (Desempeño diccionario)
Luego del proceso del punto 3, se sacan los terminos:
- guardar
- responder
- tomar
- usar

In [22]:
df_concept_ffaa = df_concept_ffaa.drop(df_concept_ffaa[df_concept_ffaa["termino"] == "guardar"].index)
df_concept_ffaa = df_concept_ffaa.drop(df_concept_ffaa[df_concept_ffaa["termino"] == "responder"].index)
df_concept_ffaa = df_concept_ffaa.drop(df_concept_ffaa[df_concept_ffaa["termino"] == "tomar"].index)
df_concept_ffaa = df_concept_ffaa.drop(df_concept_ffaa[df_concept_ffaa["termino"] == "usar"].index)

In [23]:
df_concept_ffaa

Unnamed: 0,termino,count
0,proteger,73
1,apoyar,4
2,defender,58
5,contribuir,12
6,vivir,1
7,ejercer,13
8,cumplir,21
9,respetar,25
10,educar,1
11,trabajar,5


### 3 - Desempeño diccionario

In [14]:
df_dummy_categories_ffaa = pd.get_dummies(df_concept_ffaa.groupby(by = ['termino']).count().reset_index()['termino'])

In [15]:
df_dummy_categories_ffaa = df_dummy_categories_ffaa[0:0]

In [16]:
df_institutions_all_group = institutions_all.groupby(by = ['categoria']).count().reset_index()['categoria']
df_dummy_categories_ffaa['categoria'] = df_institutions_all_group

In [17]:
df_dummy_categories_ffaa = df_dummy_categories_ffaa.set_index("categoria")

for col in df_dummy_categories_ffaa.columns:
    df_dummy_categories_ffaa[col].values[:] = 0

In [18]:
for index, row in institutions_all.iterrows():
  for i, v in df_dummy_categories_ffaa.loc[row['categoria']].items():
    df_dummy_categories_ffaa.loc[row['categoria']][i] = df_dummy_categories_ffaa.loc[row['categoria']][i] + row['fundamento'].count(i)


In [19]:
df_dummy_categories_ffaa.sort_values(
  ascending=False,
  by = [
    "apoyar",
    "conocer",
    "conservar",
    "contribuir",
    "cumplir",
    "decir",
    "defender",
    "educar",
    "ejercer",
    "guardar", #
    "procurar",
    "proteger",
    "respetar",
    "responder", #
    "servir",
    "tomar", #
    "trabajar",
    "usar", #
    "vivir"
  ]
)

Unnamed: 0_level_0,apoyar,conocer,conservar,contribuir,cumplir,decir,defender,educar,ejercer,guardar,procurar,proteger,respetar,responder,servir,tomar,trabajar,usar,vivir
categoria,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Gobierno local / Municipal,12.0,17.0,1.0,5.0,5.0,3.0,1.0,0.0,10.0,5.0,2.0,4.0,3.0,8.0,0.0,29.0,19.0,0.0,5.0
Defensor del Pueblo / Ciudadano,10.0,10.0,1.0,2.0,16.0,4.0,307.0,1.0,26.0,42.0,3.0,112.0,10.0,5.0,3.0,13.0,4.0,2.0,2.0
Gobierno regional,9.0,13.0,4.0,6.0,10.0,4.0,2.0,0.0,8.0,6.0,1.0,1.0,1.0,3.0,0.0,49.0,9.0,3.0,5.0
"Plebiscitos, referendos y consultas",6.0,58.0,1.0,3.0,9.0,14.0,5.0,7.0,76.0,14.0,0.0,3.0,13.0,5.0,6.0,155.0,1.0,15.0,12.0
Fuerzas Armadas,6.0,6.0,1.0,12.0,22.0,15.0,64.0,1.0,13.0,90.0,5.0,77.0,27.0,10.0,4.0,4.0,5.0,10.0,2.0
Presidencia de la República,3.0,10.0,3.0,0.0,29.0,3.0,6.0,1.0,10.0,12.0,1.0,4.0,19.0,1.0,0.0,4.0,4.0,1.0,1.0
Congreso o parlamento,2.0,10.0,5.0,0.0,33.0,11.0,2.0,0.0,21.0,33.0,1.0,5.0,7.0,5.0,10.0,15.0,19.0,3.0,5.0
Gobierno nacional (estructura y funciones),2.0,10.0,0.0,2.0,18.0,3.0,10.0,3.0,11.0,10.0,1.0,20.0,12.0,2.0,2.0,6.0,9.0,1.0,10.0
Gobierno provincial,2.0,4.0,0.0,2.0,2.0,0.0,1.0,0.0,2.0,0.0,0.0,0.0,1.0,1.0,0.0,6.0,3.0,0.0,0.0
Ministerio Público / Defensoría Pública,2.0,4.0,0.0,0.0,7.0,2.0,33.0,0.0,11.0,11.0,1.0,8.0,3.0,3.0,2.0,5.0,2.0,3.0,1.0
