In [None]:
import pandas as pd

# Load the dataset
file_path = '../data/snapchat_reviews.csv'
data = pd.read_csv(file_path, usecols=['score', 'at', 'reviewCreatedVersion'])

# Remove rows where 'reviewCreatedVersion' is missing
data.dropna(subset=['reviewCreatedVersion'], inplace=True)

# Convert 'at' column to datetime
data['at'] = pd.to_datetime(data['at'], errors='coerce')
data.dropna(subset=['at'], inplace=True)  # Drop rows where 'at' could not be parsed to a datetime

# Filter date range from 1/1/2020 to 10/31/2024
start_date = '2020-01-01'
end_date = '2024-10-31'
data = data[(data['at'] >= start_date) & (data['at'] < end_date)]

# Group by monthly intervals and calculate the average rating
data['Month-Year'] = data['at'].dt.to_period('M').dt.strftime('%B-%Y')  # Convert to "Month-Year" format
monthly_avg = data.groupby('Month-Year')['score'].mean().reset_index()  # Group by "Month-Year" and calculate mean
monthly_avg.rename(columns={'score': 'averageRating'}, inplace=True)  # Rename score column

# Save the result
monthly_avg_path = '../data/cleaned_data/average_monthly_rating_snapchat.csv'
monthly_avg.to_csv(monthly_avg_path, index=False)