In [1]:
import requests
import pandas as pd
from datetime import datetime
import numpy as np
from functools import lru_cache

url = "https://reddit.com/r/programming/.json?limit=100"

headers = {'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:66.0) Gecko/20100101 Firefox/66.0"}
REDDIT_ROOT_URL = "https://reddit.com"

def get_with_headers(url):
    return requests.get(url, headers=headers)

@lru_cache(maxsize=32)
def get_subreddit_posts(subreddit_url):
    print(f"Getting posts from {url}...")
    response = get_with_headers(url)
    raw_posts = response.json()['data']['children']

    posts = []
    for raw_post in raw_posts:
        post = {}
        raw_post = raw_post['data']
        post['name'] = raw_post['name']
        post['title'] = raw_post['title']
        post['score'] = raw_post['score']
        post['url'] = REDDIT_ROOT_URL + raw_post['permalink']
        post['created_utc'] = raw_post['created_utc']
        post['num_comments'] = raw_post['num_comments']
        
        posts.append(post)

    return posts

# Getting all posts.
posts = get_subreddit_posts(url)
all_posts= []
    
for post in posts:
    try:
        posts = get_subreddit_posts(url)
        last_value = [ sub['name'] for sub in posts][-1]
        url = "{}&after={}".format(url,last_value)
        all_posts += get_subreddit_posts(url)
        all_posts
        
    except (IndexError):
        pass

# Creating data frame

all_posts = pd.DataFrame(all_posts)

all_posts["created_utc"] = all_posts["created_utc"].apply(datetime.fromtimestamp)
all_posts.rename(columns={'created_utc':'date_hour'},inplace=True)

# Splitting the column into date and time.

all_posts['date'] = [d.date() for d in all_posts['date_hour']]
all_posts['time'] = [d.time() for d in all_posts['date_hour']]

# Adding weekday.

all_posts['weekday'] = [d.weekday() for d in all_posts['date']]


# Creating a copy with only the important columns.

all_posts_score = all_posts[['date', 'score','weekday']].copy()

# Some weekdays are repeated several times.

unique = all_posts_score.date.unique().tolist()
unique_df = pd.DataFrame(unique,columns =['date'])
unique_df['weekday'] = [d.weekday() for d in unique_df['date']]
repeated = unique_df.weekday.value_counts()
repeated = repeated.to_dict()

# Creating a data frame with the number of times a weekday is repeated.

repeated_df = pd.DataFrame.from_dict(repeated, orient='index')

# Creating a data frame with the sum of total score per weekday.

score_per_day = all_posts_score.groupby(by='weekday').agg({'score':['sum']})
posts_per_day = all_posts_score.weekday.value_counts().to_frame()

# Concatenate both data frames into one.

result = pd.concat([posts_per_day, repeated_df,score_per_day], axis=1, sort=False)
result.rename(columns ={0:'repeated_weekday'},inplace=True)


# Divide the columns "weekday" and "score" by "repeated_weekday" to have the number of posts and the score per day.

result['posts_per_day']= result.weekday/result.repeated_weekday
result['score_per_day']= result['score', 'sum']/result.repeated_weekday
result = result.sort_values(['posts_per_day'], ascending = False)
result.reset_index(inplace=True)

# Replacing numbers per days.

result['index'] = result['index'].replace(0,'Monday').replace(1,'Tuesday').replace(2,'Wednesday').replace(3,'Thursday').replace(4,'Friday').replace(5,'Saturday').replace(6,'Sunday')
result.rename = result.rename(columns = {'index':'day'}, inplace = True)


# Dropping useless columns.

result.drop(['weekday', 'repeated_weekday', ('score', 'sum')], axis=1,inplace=True)


result_score = result.sort_values(['score_per_day'], ascending = False)
result_score.reset_index(inplace=True)
print("===========================================================")
print(result)
print("===========================================================")
print(result.day[0] + " is the day that most posts are published.")
highest_score = result_score.day[0]
print("Posts publised on {} are those with highest scores.".format(highest_score))

Getting posts from https://reddit.com/r/programming/.json?limit=100...
Getting posts from https://reddit.com/r/programming/.json?limit=100&after=t3_g22jaq...
Getting posts from https://reddit.com/r/programming/.json?limit=100&after=t3_g22jaq&after=t3_g18cr0...
Getting posts from https://reddit.com/r/programming/.json?limit=100&after=t3_g22jaq&after=t3_g18cr0&after=t3_fzcqn4...
Getting posts from https://reddit.com/r/programming/.json?limit=100&after=t3_g22jaq&after=t3_g18cr0&after=t3_fzcqn4&after=t3_fyogju...
Getting posts from https://reddit.com/r/programming/.json?limit=100&after=t3_g22jaq&after=t3_g18cr0&after=t3_fzcqn4&after=t3_fyogju&after=t3_fwspq0...
Getting posts from https://reddit.com/r/programming/.json?limit=100&after=t3_g22jaq&after=t3_g18cr0&after=t3_fzcqn4&after=t3_fyogju&after=t3_fwspq0&after=t3_fwr8lr...
Getting posts from https://reddit.com/r/programming/.json?limit=100&after=t3_g22jaq&after=t3_g18cr0&after=t3_fzcqn4&after=t3_fyogju&after=t3_fwspq0&after=t3_fwr8lr&aft

In [2]:
all_posts

Unnamed: 0,name,title,score,url,date_hour,num_comments,date,time,weekday
0,t3_g1xxaz,Watch tech conference videos in one place,0,https://reddit.com/r/programming/comments/g1xx...,2020-04-15 20:39:56,0,2020-04-15,20:39:56,2
1,t3_g184i6,Comparing HTTP/3 vs. HTTP/2 Performance,139,https://reddit.com/r/programming/comments/g184...,2020-04-14 17:57:12,23,2020-04-14,17:57:12,1
2,t3_g1qv5d,Fixing timing issues in tests,2,https://reddit.com/r/programming/comments/g1qv...,2020-04-15 14:20:30,5,2020-04-15,14:20:30,2
3,t3_g1xd2n,8 Ruby on Rails Podcasts That Will Expand Your...,0,https://reddit.com/r/programming/comments/g1xd...,2020-04-15 20:11:30,0,2020-04-15,20:11:30,2
4,t3_g1svjd,The Unfriendly Robot: Automatically flagging u...,0,https://reddit.com/r/programming/comments/g1sv...,2020-04-15 16:24:07,1,2020-04-15,16:24:07,2
...,...,...,...,...,...,...,...,...,...
758,t3_fuds4q,Introducing the Internet Computer (Bronze) | W...,0,https://reddit.com/r/programming/comments/fuds...,2020-04-03 20:35:07,2,2020-04-03,20:35:07,4
759,t3_fueedx,Server Nope Beta Launch!,0,https://reddit.com/r/programming/comments/fuee...,2020-04-03 21:09:09,0,2020-04-03,21:09:09,4
760,t3_ftu3lw,"When debugging, your attitude matters",6,https://reddit.com/r/programming/comments/ftu3...,2020-04-02 22:40:53,0,2020-04-02,22:40:53,3
761,t3_fu6d5k,Decentralised Authentication Project - Looking...,0,https://reddit.com/r/programming/comments/fu6d...,2020-04-03 12:58:09,2,2020-04-03,12:58:09,4


In [3]:
all_posts['hour'] = all_posts['date_hour'].dt.hour
all_posts['minute'] = all_posts['date_hour'].dt.minute
all_posts['second'] = all_posts['date_hour'].dt.second

all_posts.hour.value_counts()

15    58
18    52
16    48
19    47
21    44
17    41
14    40
20    38
13    38
11    36
22    36
8     33
12    32
23    28
10    28
2     27
9     23
1     23
6     20
0     19
7     17
3     13
5     12
4     10
Name: hour, dtype: int64

In [4]:
hours_weekday = all_posts.groupby(by=['weekday','hour']).agg({'score':['sum']}).reset_index()#.sort_values(['weekday'],ascending = False)

hours_weekday

MultiIndex([('weekday',    ''),
            (   'hour',    ''),
            (  'score', 'sum')],
           )

In [6]:
hours_weekday['total_score'] = hours_weekday['score', 'sum']*1
hours_weekday.drop([('score', 'sum')], axis=1,inplace=True)
hours_weekday

Unnamed: 0,weekday,hour,total_score
,,,
0,0,0,17
1,0,1,6
2,0,2,2664
3,0,3,0
4,0,4,0
...,...,...,...
158,6,19,17
159,6,20,2
160,6,21,123


In [7]:
hours_weekday.rename = hours_weekday.rename(columns = {'valuesum':'total_score'}, inplace = True)

In [9]:
hours_weekday.sort_values(['total_score'],ascending= False)
hours_weekday['weekday'] = hours_weekday['weekday'].replace(0,'Monday').replace(1,'Tuesday').replace(2,'Wednesday').replace(3,'Thursday').replace(4,'Friday').replace(5,'Saturday').replace(6,'Sunday')

In [12]:
hours_weekday.sort_values(['total_score'],ascending= False)

Unnamed: 0,weekday,hour,total_score
,,,
131,Saturday,15,4301
65,Wednesday,18,3962
16,Monday,17,3729
144,Sunday,4,3358
88,Thursday,18,3108
...,...,...,...
89,Thursday,19,0
29,Tuesday,6,0
26,Tuesday,3,0


In [40]:
filter_df = hours_weekday[(hours_weekday['weekday']== highest_score)]
filter_df.sort_values(['total_score'],ascending= False,inplace=True)
filter_df
time = filter_df.hour.iloc[0]
time

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


15