In [None]:
import praw
import pandas as pd
import time
from datetime import datetime
import os


USER_AGENT = os.environ['USER_AGENT']
CLIENT_ID = os.environ['CLIENT_ID']
CLIENT_SECRET = os.environ['CLIENT_SECRET']
WORKSPACE = os.environ['WORKSPACE']



# Initialize the Reddit instance
user_agent = USER_AGENT
reddit = praw.Reddit(
 client_id = CLIENT_ID,
 client_secret = CLIENT_SECRET,
 user_agent=user_agent
)

# Define the current time and the time one year ago
current_time = int(time.time())
one_year_ago = current_time - (24 * 3600)  # 8760 hours in a year
one_year_ago_datetime = datetime.utcfromtimestamp(one_year_ago)

# Initialize a list to store posts
all_posts = []

# Define the list of subreddits to traverse
subreddits = ['ManchesterUnited',
 'crystalpalace',
 'nffc',
 'coys',
 'WWFC',
 'chelseafc',
 'ArsenalFC',
 'COYH',
 'Everton',
 'BrightonHoveAlbion',
 'NUFC',
 'Hammers',
 'Brentford',
 'MCFC',
 'fulhamfc',
 'LiverpoolFC',
 'Burnley',
 'avfc',
 'SheffieldUnited',
 'AFCBournemouth']

# Iterate over each subreddit
for subreddit_name in subreddits:
    # Initialize a set to store posts for the current subreddit
    subreddit_posts = set()

    # Fetch posts from the current subreddit within the past year
    subreddit = reddit.subreddit(subreddit_name)
    for submission in subreddit.new(limit=None):
        created_time_utc = datetime.utcfromtimestamp(submission.created_utc)
        
        # Ensure the post is within the past year
        if created_time_utc >= one_year_ago_datetime:
            post_details = (
                subreddit_name,
                submission.title,
                str(submission.author),
                created_time_utc.date(),
                submission.score,
                submission.upvote_ratio,
                submission.num_comments
            )
            subreddit_posts.add(post_details)

    # If no posts are found for the current subreddit, manually add a tuple with zero values
    if not subreddit_posts:
        zero_post_details = (
            subreddit_name,
            "",
            "",
            datetime.now().date(),  # Current date for consistency
            0,  # Zero values for score, upvote ratio, and number of comments
            0,
            0
        )
        subreddit_posts.add(zero_post_details)

    # Add the posts for the current subreddit to the list of all posts
    all_posts.extend(subreddit_posts)

# Print the number of unique posts fetched
print("Number of unique posts:", len(all_posts))


In [None]:
import pandas as pd
df = pd.DataFrame(all_posts)

In [None]:
df.columns = ['name','title', 'author', 'date','upvotes_count', 'upvote_ratio','num_comments']
df

In [None]:
avg = df.groupby('name').upvote_ratio.mean()
avg

In [None]:
df.to_csv('./DATA/reddit_fc_data_old.csv', mode='a', header=False, index=False)

In [None]:
len(df)