In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Load the web traffic datasets from the provided file paths
december_path = '/Users/ishaans/Downloads/2019-Dec-Cleaned.csv'
november_path = '/Users/ishaans/Downloads/2019-Nov-Cleaned.csv'

try:
    december_data = pd.read_csv(december_path)
    november_data = pd.read_csv(november_path)
except FileNotFoundError:
    print("One or more dataset files not found. Please ensure the paths are correct and the files exist.")
    exit()

# Combine the datasets
combined_data = pd.concat([november_data, december_data])

# Convert event_time column to datetime and ensure proper sorting of dates
combined_data['event_time'] = pd.to_datetime(combined_data['event_time'])
combined_data.sort_values(by='event_time', inplace=True)

# Calculate web traffic as the count of events per hour (extracting hour part)
combined_data['hour'] = combined_data['event_time'].dt.floor('H')
traffic_data_hourly = combined_data.groupby('hour').size().reset_index(name='traffic_count')

# Plot the web traffic by hours
plt.figure(figsize=(12, 6))
plt.plot(traffic_data_hourly['hour'], traffic_data_hourly['traffic_count'], marker='o', label='Web Traffic by Hour')

# Formatting the visualization
plt.title('Web Traffic Over Time by Hour (November - December 2)', fontsize=14)
plt.xlabel('Hour', fontsize=12)
plt.ylabel('Web Traffic (Event Count)', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)

# Formatting the x-axis to show hours only
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1))  # Set interval for every hour

plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
plt.legend()
plt.tight_layout()

# Save the plot
output_plot_path = "web_traffic_visualization_by_hour.png"
plt.savefig(output_plot_path)
plt.show()