In [None]:
import sqlite3
import nltk
import re
from matplotlib import pyplot as plt

conn = sqlite3.connect('DBTic.db')
c = conn.cursor()

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

def recognize_names(text):
    sentences = nltk.sent_tokenize(text)
    tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
    tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
    named_entities = nltk.ne_chunk_sents(tagged_sentences, binary=True)

    names = []
    for tree in named_entities:
        for entity in tree:
            if hasattr(entity, 'label') and entity.label() == 'NE':
                name = ' '.join([child[0] for child in entity])
                if not re.search(r'\b[A-Z]+\b', name):
                    names.append(name)

    return names

# Obtener títulos de las tablas
tables = ["Ambito_Financiero", "Ole", "Perfil", "Telam"]
titles = []
for table in tables:
    c.execute(f"SELECT Titulo FROM {table}")
    result = c.fetchall()
    titles.extend([row[0] for row in result])

text = ' '.join(titles)

names = recognize_names(text)
print(names)

# Generar gráfico de barras con la frecuencia de los nombres
name_freq = nltk.FreqDist(names)
top_names = name_freq.most_common(10)  # Obtener los nombres más frecuentes

names, frequencies = zip(*top_names)

plt.rcParams['font.size'] = 20
plt.figure(figsize=(20, 12))
plt.bar(names, frequencies)
plt.xlabel('Nombres')
plt.ylabel('Frecuencia')
plt.title('Nombres más frecuentes')
plt.xticks(rotation=90)
plt.show()

conn.commit()
conn.close()
