In [3]:
import json
import pandas as pd

# Load the JSON data
with open('data/twitter-50mb.json') as f:
    data = json.load(f)

# Extract relevant fields and calculate sentiment scores
tweets = []
for tweet in data['rows']:
    if 'doc' not in tweet:
        continue
    if 'sentiment' not in tweet['doc']['data']:
        continue
    created_at = tweet['doc']['data']['created_at']
    sentiment_score = tweet['doc']['data']['sentiment']
    if isinstance(sentiment_score, dict):
        sentiment_score = sentiment_score['score']
    tweets.append({'created_at': created_at, 'sentiment_score': sentiment_score})

tweets_df = pd.DataFrame(tweets)
tweets_df['created_at'] = pd.to_datetime(tweets_df['created_at'])

tweets_df['hour'] = tweets_df['created_at'].dt.hour
tweets_df['day'] = tweets_df['created_at'].dt.date

happiest_hour = tweets_df.groupby('hour')['sentiment_score'].sum().idxmax()
happiest_day = tweets_df.groupby('day')['sentiment_score'].sum().idxmax()
most_active_hour = tweets_df['hour'].value_counts().idxmax()
most_active_day = tweets_df['day'].value_counts().idxmax()


print(f"Happiest hour: {happiest_hour}")
print(f"Happiest day: {happiest_day}")
print(f"Most active hour: {most_active_hour}")
print(f"Most active day: {most_active_day}")



Happiest hour: 2
Happiest day: 2021-06-21
Most active hour: 1
Most active day: 2021-06-21
