In [8]:
import praw
import pandas as pd
import time
from datetime import datetime
import os


USER_AGENT = os.environ['USER_AGENT']
CLIENT_ID = os.environ['CLIENT_ID']
CLIENT_SECRET = os.environ['CLIENT_SECRET']



# Initialize the Reddit instance
user_agent = USER_AGENT
reddit = praw.Reddit(
 client_id = CLIENT_ID,
 client_secret = CLIENT_SECRET,
 user_agent=user_agent
)

# Define the current time and the time one year ago
current_time = int(time.time())
one_year_ago = current_time - (24 * 3600)  # 8760 hours in a year
one_year_ago_datetime = datetime.utcfromtimestamp(one_year_ago)

# Initialize a list to store posts
all_posts = []

# Define the list of subreddits to traverse
subreddits = ['ManchesterUnited',
 'crystalpalace',
 'nffc',
 'coys',
 'WWFC',
 'chelseafc',
 'ArsenalFC',
 'COYH',
 'Everton',
 'BrightonHoveAlbion',
 'NUFC',
 'Hammers',
 'Brentford',
 'MCFC',
 'fulhamfc',
 'LiverpoolFC',
 'Burnley',
 'avfc',
 'SheffieldUnited',
 'AFCBournemouth']

# Iterate over each subreddit
for subreddit_name in subreddits:
    # Initialize a set to store posts for the current subreddit
    subreddit_posts = set()

    # Fetch posts from the current subreddit within the past year
    subreddit = reddit.subreddit(subreddit_name)
    for submission in subreddit.new(limit=None):
        created_time_utc = datetime.utcfromtimestamp(submission.created_utc)
        
        # Ensure the post is within the past year
        if created_time_utc >= one_year_ago_datetime:
            post_details = (
                subreddit_name,
                submission.title,
                str(submission.author),
                created_time_utc.date(),
                submission.score,
                submission.upvote_ratio,
                submission.num_comments
            )
            subreddit_posts.add(post_details)

    # If no posts are found for the current subreddit, manually add a tuple with zero values
    if not subreddit_posts:
        zero_post_details = (
            subreddit_name,
            "",
            "",
            datetime.now().date(),  # Current date for consistency
            0,  # Zero values for score, upvote ratio, and number of comments
            0,
            0
        )
        subreddit_posts.add(zero_post_details)

    # Add the posts for the current subreddit to the list of all posts
    all_posts.extend(subreddit_posts)

# Print the number of unique posts fetched
print("Number of unique posts:", len(all_posts))


KeyError: 'USER_AGENT'

In [9]:
import pandas as pd
df = pd.DataFrame(all_posts)

In [10]:
df.columns = ['name','title', 'author', 'date','upvotes_count', 'upvote_ratio','num_comments']
df

Unnamed: 0,name,title,author,date,upvotes_count,upvote_ratio,num_comments
0,ManchesterUnited,Euro 2024 SF: Netherlands Vs England,scoreboard-app,2024-07-10,10,0.68,22
1,crystalpalace,Jobe Bellingham’s plan is to stay at Sunderlan...,Punjavepoonpoon,2024-07-10,40,0.96,4
2,nffc,Secret Garden FanZone,akbfc67,2024-07-10,7,1.00,1
3,nffc,Our new club mascot (warning: cursed),theivoryserf,2024-07-10,29,0.94,4
4,nffc,Match Thread England 🏴󠁧󠁢󠁥󠁮󠁧󠁿 v Netherlands 🇧🇪,Killoah,2024-07-10,9,1.00,18
...,...,...,...,...,...,...,...
129,avfc,Jhon Duran to the Hammers for £35 million. Vil...,MrBlueSky57,2024-07-10,68,0.92,85
130,SheffieldUnited,"So, we are half way through the working week, ...",Bigtallanddopey,2024-07-10,9,0.80,7
131,SheffieldUnited,Kit reveal tomorrow morning (Thursday 9AM),Shadota,2024-07-10,7,0.82,11
132,SheffieldUnited,Last attempt at a sub night out before I call ...,BladesmansAlt,2024-07-10,6,0.88,3


In [11]:
avg = df.groupby('name').upvote_ratio.mean()
avg

name
AFCBournemouth        0.910000
ArsenalFC             0.909000
Brentford             1.000000
BrightonHoveAlbion    0.990000
Burnley               0.960000
COYH                  1.000000
Everton               0.893333
Hammers               0.905000
LiverpoolFC           0.927333
MCFC                  0.870000
ManchesterUnited      0.680000
NUFC                  0.738571
SheffieldUnited       0.833333
WWFC                  0.970000
avfc                  0.869333
chelseafc             0.924375
coys                  0.956364
crystalpalace         0.960000
fulhamfc              1.000000
nffc                  0.933750
Name: upvote_ratio, dtype: float64

In [12]:
df.to_csv('test1.csv',  index=False)

In [13]:
len(df)

134

In [14]:
from print_versions import print_versions
print_versions(globals())

pandas==1.5.3
praw==7.7.1
