In [None]:
# data_visualization.ipynb

# Step 1: Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import pymongo

# Step 2: Connect to MongoDB and fetch the data
mongo_client = pymongo.MongoClient('mongodb://localhost:27017/')
db = mongo_client['ethio_mart']
collection = db['messages']

# Fetch data into a DataFrame
data = collection.find()
df = pd.DataFrame(list(data))

# Step 3: Data preprocessing for visualization
# Convert timestamps to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Step 4: Visualizing message frequency over time
plt.figure(figsize=(12, 6))
df['date'] = df['timestamp'].dt.date  # Extract date from timestamp
message_counts = df.groupby('date').size()  # Count messages per date

plt.plot(message_counts.index, message_counts.values, marker='o')
plt.title('Message Frequency Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Messages')
plt.xticks(rotation=45)
plt.grid()
plt.tight_layout()
plt.show()

# Step 5: Visualizing top senders
plt.figure(figsize=(12, 6))
top_senders = df['sender'].value_counts().head(10)
top_senders.plot(kind='bar', color='skyblue')
plt.title('Top 10 Message Senders')
plt.xlabel('Sender')
plt.ylabel('Number of Messages')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()
