We'll compare these two types of posts to determine the following:

Do Ask HN or Show HN receive more comments on average?\
Do posts created at a certain time receive more comments on average?


In [9]:

from csv import reader


hacker_news_dataset = list(reader(open("hacker_news.csv")))
headers = hacker_news_dataset[0]
hacker_news_posts = hacker_news_dataset[1:]

ask_posts = []
show_posts = []
other_posts = []

for post in hacker_news_posts:
    
    post_title = post[1].lower()

    if post_title.startswith("ask hn"):
        ask_posts.append(post)
    elif post_title.startswith("show hn"):
        show_posts.append(post)
    else:
        other_posts.append(post)

print(headers)

['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']


Which type of posts receives more comments on average?

In [8]:
def count_average_number_of_comments_for(dataset):
    sum_of_comments = 0
    count_of_posts = 0

    for post in dataset:
        comments_number = int(post[4])
        sum_of_comments += comments_number
        count_of_posts += 1

    return round(sum_of_comments / count_of_posts)

avg_ask_posts_comments = count_average_number_of_comments_for(ask_posts)
avg_show_posts_comments = count_average_number_of_comments_for(show_posts)

print(avg_ask_posts_comments)
print(avg_show_posts_comments)

14
10


On average, the "ask HN" posts receive more comments than "show HN". It might mean that people prefer to show how smart they are instead of looking at other people's work 😁

Since "ask HN" is more often commented, I'll explore this dataset further. Now I'd like to see if posts created at a certain hour of the day have a higher chance of receiving comments.

In [35]:
import datetime

posts_comments_per_hour = {}
average_comments_per_hour = {}

for post in ask_posts:
    number_of_comments = int(post[4])
    time_created = datetime.datetime.strptime(post[6], "%m/%d/%Y %H:%M")
    hour_created = time_created.hour

    if hour_created in posts_comments_per_hour:
        posts_comments_per_hour[hour_created]["comments"] += number_of_comments
        posts_comments_per_hour[hour_created]["posts"] += 1
    else:
        posts_comments_per_hour[hour_created] = {"comments": number_of_comments, "posts": 1}

for hour_with_comments, posts_comments in posts_comments_per_hour.items():
    average_comments_per_hour[hour_with_comments] = round(posts_comments["comments"] / posts_comments["posts"])

hours_with_most_comments = sorted(average_comments_per_hour.items(), key=lambda x: x[1], reverse=True)[:5]

for hour_with_comments in hours_with_most_comments:
    print(hour_with_comments)
    print(f"{hour_with_comments[0]}:00: {hour_with_comments[1]} average comments per post")

(15, 39)
15:00: 39 average comments per post
(2, 24)
2:00: 24 average comments per post
(20, 22)
20:00: 22 average comments per post
(16, 17)
16:00: 17 average comments per post
(21, 16)
21:00: 16 average comments per post
