In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import numpy as np
import tensorflow as tf
from nltk.sentiment import SentimentIntensityAnalyzer
import praw
import nltk
import sqlite3

In [3]:
# Carrega os dados
column_names = ["target", "id", "date", "flag", "user", "text"]
df = pd.read_csv(r"..\data\training.1600000.processed.noemoticon.csv", encoding="ISO-8859-1", names=column_names)

In [4]:
# Pré-processamento dos dados
df = df[["target", "text"]]  # Mantém apenas as colunas necessárias
df["target"] = df["target"].replace({0: 0, 4: 1})

In [5]:
# Tokenização e Vetorização
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df["text"])
sequences = tokenizer.texts_to_sequences(df["text"])
padded_sequences = pad_sequences(sequences, maxlen=100)

In [6]:
# Divisão dos dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, df["target"], test_size=0.2, random_state=42)


In [8]:
# Construção do modelo RNN
model = Sequential([
    Embedding(input_dim=10000, output_dim=16),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

In [9]:
# Compilação do modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [10]:
# Treinamento do modelo
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))


Epoch 1/5
[1m20000/20000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m445s[0m 22ms/step - accuracy: 0.7877 - loss: 0.4491 - val_accuracy: 0.8172 - val_loss: 0.3989
Epoch 2/5
[1m14258/20000[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m1:47[0m 19ms/step - accuracy: 0.8245 - loss: 0.3871

KeyboardInterrupt: 

In [None]:
# Avaliação do modelo
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Acurácia do modelo nos dados de teste:", test_acc)

In [None]:
# Salvar o modelo inteiro
model.save(r"..\modelos\modelo_rnn.h5")
model.save(r"..\modelos\modelo_rnn.keras")


In [None]:
# Configura as credenciais para acessar a API do Reddit
reddit = praw.Reddit(
    client_id="0LUIMHwzq6iTBcF4F4zGpQ",
    client_secret="AN90R4CXtXjCpEfXEEVCIKIjReY0NA",
    user_agent="aps",
)

In [None]:
# Carregar o modelo treinado
model = tf.keras.models.load_model(r"..\modelos\modelo_aps.keras")

In [None]:
# Função para classificar o sentimento de um texto
def classify_sentiment(text):
    # Tokenização e vetorização
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=100)
    # Prever o sentimento usando o modelo treinado
    prediction = model.predict(padded_sequence)[0][0]
    if prediction >= 0.5:
        return "Positivo"
    else:
        return "Negativo"

In [None]:
# Conectar ao banco de dados SQLite
conn = sqlite3.connect(r"..\data\reddit_posts.db")
cursor = conn.cursor()

try:
    # Lista de tópicos de interesse
    topics = ["deforestation", "forestfires", "floods", "rain", "riverpollution", "dams"]
    # Iterar sobre os posts do Reddit
    for topic in topics:
        for submission in reddit.subreddit("all").search(topic, sort="hot", time_filter="week"):
            if submission.selftext.strip() != "" or submission.url.strip() != "":
                # Concatenar o título e o conteúdo do post
                text = submission.title + " " + submission.selftext
                # Classificar o sentimento do texto (você precisa definir a função classify_sentiment)
                sentiment = classify_sentiment(text)
                cursor.execute('''INSERT INTO posts (id, title, content, sentiment, subreddit, author, url, score, num_comments, created_utc)
                                  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
                               (submission.id, submission.title, submission.selftext, sentiment, submission.subreddit, submission.author.name,
                                submission.url, submission.score, submission.num_comments, submission.created_utc))
    # Confirmar as alterações no banco de dados
    conn.commit()

finally:
    # Fechar a conexão com o banco de dados, mesmo em caso de exceção
    conn.close()