# **Import Libraries**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# **Read dataset**

In [None]:
# Load the dataset
data = pd.read_csv('/content/social_media_data.csv')

In [None]:
data.head()

# ****Pre-processing ****

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
data.dropna(inplace=True)

In [None]:
data.isnull().sum()

In [None]:
data.shape

In [None]:
data.duplicated().sum()

In [None]:
# Convert the date column to datetime format
data['date'] = pd.to_datetime(data['date'])


# **What was the total number of likes, shares, comments, and views (for videos) over the year?**

In [None]:
# Calculate the total number of likes, shares, comments, and views (for videos) over the year
total_likes = data['likes'].sum()
total_shares = data['shares'].sum()
total_comments = data['comments'].sum()
total_views = data[data['post_type'] == 'video']['views'].sum()


In [None]:
# Print the results
print(f"Total number of likes: {total_likes}")
print(f"Total number of shares: {total_shares}")
print(f"Total number of comments: {total_comments}")
print(f"Total number of views (for videos): {total_views}")

In [None]:
# Total Number of Likes, Shares, Comments, and Views
plt.figure(figsize=(10, 6))
plt.title('Total Engagement Metrics Over the Year')
metrics = ['Likes', 'Shares', 'Comments', 'Views']
values = [total_likes, total_shares, total_comments, total_views]
plt.bar(metrics, values)
plt.ylabel('Count')
plt.show()

In [None]:
# Calculate total engagement (engagement = likes + shares + comments)
data['engagement'] = data['likes'] + data['shares'] + data['comments']

In [None]:
# 2. Which platform had the highest engagement? (engagement = likes + shares + comments)
data['engagement'] = data['likes'] + data['shares'] + data['comments']
highest_engagement_platform = data.groupby('platform')['engagement'].sum().idxmax()
print(f"The platform with the highest engagement is: {highest_engagement_platform}")


In [None]:
# 3. What was the average number of likes, shares, comments, and views per post on each platform?
platform_stats = data.groupby('platform')[['likes', 'shares', 'comments', 'views']].mean()
print(platform_stats)

In [None]:
# 4. What was the total number of each type of post (video, image, text) on each platform?
post_type_counts = data.groupby(['platform', 'post_type']).size().unstack(fill_value=0)
print(post_type_counts)

In [None]:
# 5. Which type of post had the highest average engagement?
highest_engagement_post_type = data.groupby('post_type')['engagement'].mean().idxmax()
print(f"The post type with the highest average engagement is: {highest_engagement_post_type}")


In [None]:
# 6. What was the total engagement in each quarter of the year?
data['quarter'] = data['date'].dt.quarter
quarterly_engagement = data.groupby('quarter')['engagement'].sum()
print(quarterly_engagement)