In [1]:
import praw

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

CLIENT_ID = os.getenv("R_CLIENT_ID")
CLIENT_SECRET = os.getenv("R_CLIENT_SECRET")
USER_AGENT = os.getenv("R_USER_AGENT")
USERNAME = os.getenv("R_USERNAME")
PASSWORD = os.getenv("R_PASSWORD")

In [3]:
reddit_instance = praw.Reddit(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    user_agent=USER_AGENT,
    username=USERNAME,
    password=PASSWORD
)

In [4]:
subreddit = reddit_instance.subreddit('TrueFilm')
subreddit

Subreddit(display_name='TrueFilm')

In [5]:
print(subreddit.title)

TrueFilm: An in-depth discussion of film


In [6]:
print(subreddit.description)

###About Us
-

/r/TrueFilm is a subreddit for in-depth discussions about film.

We want to encourage and support in-depth, intellectual discussion. Clear, polite and well-written responses should be upvoted; opinions should not be downvoted.

---

###Rules [(Expanded)](https://www.reddit.com/r/TrueFilm/about/rules/)

**General:**

1. All discussion must be related to film.

2. No racism, sexism, or other forms of bigotry.

3. Moderators have final discretion.

**Posts:**

4. Threads must promote in-depth discussion.

5. Threads must point discussion in a specific direction.

6. Links to outside articles must be submitted in a self-post and are subject to the above posting rules. [(Click for video essays)](https://redd.it/64fj4m)

**Comments:**

7. Be civil and don’t downvote opinions.

8. There is a 180 character minimum for top-level comments.

---

###Follow us on:

- [**LETTERBOXD**](http://letterboxd.com/truefilmreddit/) 

- [**TWITTER**](https://twitter.com/truefilmreddit)

---

#

In [7]:
print(subreddit.subscribers)

484077


In [8]:
import pandas as pd
from datetime import datetime

top_posts = subreddit.top(limit=250, time_filter="year")

posts_data = []
for post in top_posts:
    # Converte o timestamp Unix para um formato de data legível
    post_date = datetime.utcfromtimestamp(post.created_utc).strftime('%Y-%m-%d %H:%M:%S')
    
    # Adiciona os dados do post à lista
    posts_data.append({
        'Title': post.title,
        'Content': post.selftext,
        'Created': post_date  # Data de criação
    })
df = pd.DataFrame(posts_data)
df.to_excel('reddit_top_posts.xlsx', index=False)

In [9]:
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import torch

# Carrega o dataset dos posts que criaste
df = pd.read_excel('reddit_top_posts.xlsx')

# Carrega o modelo e o tokenizer RoBERTa
model = AutoModelForSequenceClassification.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')
tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-roberta-base-sentiment')

# Função para analisar o sentimento do texto com tokenização manual
def analyze_sentiment(text):
    if isinstance(text, str) and pd.notnull(text):
        # Tokeniza o texto
        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
        
        # Passa pelo modelo
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Obtém as probabilidades e determina o rótulo de sentimento
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        sentiment_label = torch.argmax(probs).item()
        
        # Mapeia os índices para os rótulos de sentimento: 0=Negative, 1=Neutral, 2=Positive
        labels = ['Negative', 'Neutral', 'Positive']
        return labels[sentiment_label]
    else:
        return "No analysis"

# Aplica a função de análise de sentimento ao conteúdo do post e cria uma nova coluna 'sentiment'
df['sentiment'] = df['Content'].apply(analyze_sentiment)

# Salva o DataFrame com a nova coluna de sentimento em um novo arquivo Excel
df.to_excel('reddit_top_posts_with_sentiment.xlsx', index=False)
