In [None]:
import pandas as pd

# Load the dataset
file_path = '../data/snapchat_reviews.csv'
data = pd.read_csv(file_path, usecols=['score', 'at', 'reviewCreatedVersion'])

# Remove rows where 'reviewCreatedVersion' is missing
data.dropna(subset=['reviewCreatedVersion'], inplace=True)

# Convert 'at' column to datetime
data['at'] = pd.to_datetime(data['at'], errors='coerce')
data.dropna(subset=['at'], inplace=True)  # Drop rows where 'at' could not be parsed to a datetime

# Filter date range from 1/1/2020 to 10/31/2024
start_date = '2020-01-01'
end_date = '2024-10-31'
data = data[(data['at'] >= start_date) & (data['at'] < end_date)]

# Group by weekly intervals and calculate the average rating
data.set_index('at', inplace=True)  # Set 'at' as the index for resampling
weekly_avg = data['score'].resample('W').mean().reset_index()  # Weekly frequency with exact dates
weekly_avg.rename(columns={'score': 'averageRating'}, inplace=True)
weekly_avg = weekly_avg.sort_values(by='at')  # Sort by date

# Save the result
weekly_avg_path = '../data/average_weekly_rating_snapchat.csv'
weekly_avg.to_csv(weekly_avg_path, index=False)