# Facebook Metrics Data Analysis

In [2]:
import pandas as pd

# Load the dataset
file_path = "../datasets/dataset_Facebook.csv"
df = pd.read_csv(file_path, sep=';')
df.head()

Unnamed: 0,Page total likes,Type,Category,Post Month,Post Weekday,Post Hour,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions
0,139441,Photo,2,12,4,3,0.0,2752,5091,178,109,159,3078,1640,119,4,79.0,17.0,100
1,139441,Status,2,12,3,10,0.0,10460,19057,1457,1361,1674,11710,6112,1108,5,130.0,29.0,164
2,139441,Photo,3,12,3,3,0.0,2413,4373,177,113,154,2812,1503,132,0,66.0,14.0,80
3,139441,Photo,2,12,2,10,1.0,50128,87991,2211,790,1119,61027,32048,1386,58,1572.0,147.0,1777
4,139441,Photo,2,12,2,3,0.0,7244,13594,671,410,580,6228,3200,396,19,325.0,49.0,393


In [3]:
df.shape

(500, 19)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 19 columns):
 #   Column                                                               Non-Null Count  Dtype  
---  ------                                                               --------------  -----  
 0   Page total likes                                                     500 non-null    int64  
 1   Type                                                                 500 non-null    object 
 2   Category                                                             500 non-null    int64  
 3   Post Month                                                           500 non-null    int64  
 4   Post Weekday                                                         500 non-null    int64  
 5   Post Hour                                                            500 non-null    int64  
 6   Paid                                                                 499 non-null    float64
 7   Lifetime

In [5]:
df['Type'].unique()

array(['Photo', 'Status', 'Link', 'Video'], dtype=object)

In [7]:
# a. Create data subsets for type of post
photo_df = df[df['Type'] == 'Photo']
status_df = df[df['Type'] == 'Status']
link_df = df[df['Type'] == 'Link']
video_df = df[df['Type'] == 'Video']

print("Photo posts count:", len(photo_df))
print("Status posts count:", len(status_df))
print("Link posts count:", len(link_df))
print("Video posts count:", len(video_df))

status_df.head()

Photo posts count: 426
Status posts count: 45
Link posts count: 22
Video posts count: 7


Unnamed: 0,Page total likes,Type,Category,Post Month,Post Weekday,Post Hour,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions
1,139441,Status,2,12,3,10,0.0,10460,19057,1457,1361,1674,11710,6112,1108,5,130.0,29.0,164
5,139441,Status,2,12,1,9,0.0,10472,20849,1191,1073,1389,16034,7852,1016,1,152.0,33.0,186
8,139441,Status,2,12,7,3,0.0,11844,22538,1530,1407,1692,15220,7912,1250,0,161.0,31.0,192
10,139441,Status,2,12,5,10,0.0,21744,42334,4258,4100,4540,37849,18952,3798,0,233.0,19.0,252
15,138414,Status,2,12,3,10,0.0,10060,19680,1264,1209,1425,17272,8548,1162,4,86.0,18.0,108


In [8]:
# b. Merge two subsets
merged_df = pd.concat([photo_df, status_df], ignore_index=True)
print("Total merged posts count:", len(merged_df))

merged_df.head()


Total merged posts count: 471


Unnamed: 0,Page total likes,Type,Category,Post Month,Post Weekday,Post Hour,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions
0,139441,Photo,2,12,4,3,0.0,2752,5091,178,109,159,3078,1640,119,4,79.0,17.0,100
1,139441,Photo,3,12,3,3,0.0,2413,4373,177,113,154,2812,1503,132,0,66.0,14.0,80
2,139441,Photo,2,12,2,10,1.0,50128,87991,2211,790,1119,61027,32048,1386,58,1572.0,147.0,1777
3,139441,Photo,2,12,2,3,0.0,7244,13594,671,410,580,6228,3200,396,19,325.0,49.0,393
4,139441,Photo,3,12,1,3,1.0,11692,19479,481,265,364,15432,9328,379,3,249.0,27.0,279


In [9]:
# c. Sort Data on Page total likes
sorted_df = df.sort_values(by='Page total likes', ascending=False)
sorted_df.head()


Unnamed: 0,Page total likes,Type,Category,Post Month,Post Weekday,Post Hour,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions
0,139441,Photo,2,12,4,3,0.0,2752,5091,178,109,159,3078,1640,119,4,79.0,17.0,100
8,139441,Status,2,12,7,3,0.0,11844,22538,1530,1407,1692,15220,7912,1250,0,161.0,31.0,192
1,139441,Status,2,12,3,10,0.0,10460,19057,1457,1361,1674,11710,6112,1108,5,130.0,29.0,164
12,139441,Photo,2,12,5,10,0.0,2847,5133,193,115,133,3779,2072,152,0,90.0,14.0,104
11,139441,Photo,2,12,5,10,0.0,3112,5590,208,127,145,3887,2174,165,0,88.0,18.0,106


In [14]:
# d. Transposing Data (just top 3 rows for brevity)
transposed_df = df.transpose()
print(transposed_df.shape)
transposed_df


(19, 500)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,490,491,492,493,494,495,496,497,498,499
Page total likes,139441,139441,139441,139441,139441,139441,139441,139441,139441,139441,...,85979,85979,85979,85093,85093,85093,81370,81370,81370,81370
Type,Photo,Status,Photo,Photo,Photo,Status,Photo,Photo,Status,Photo,...,Photo,Photo,Link,Photo,Photo,Photo,Photo,Photo,Photo,Photo
Category,2,2,3,2,2,2,3,3,2,3,...,3,3,1,3,3,3,2,1,3,2
Post Month,12,12,12,12,12,12,12,12,12,12,...,1,1,1,1,1,1,1,1,1,1
Post Weekday,4,3,3,2,2,1,1,7,7,6,...,6,6,5,1,7,7,5,5,4,4
Post Hour,3,10,3,10,3,9,3,9,3,10,...,11,3,11,2,10,2,8,2,11,4
Paid,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
Lifetime Post Total Reach,2752,10460,2413,50128,7244,10472,11692,13720,11844,4694,...,5280,6184,45920,8412,5400,4684,3480,3778,4156,4188
Lifetime Post Total Impressions,5091,19057,4373,87991,13594,20849,19479,24137,22538,8668,...,8703,10228,5808,13960,9218,7536,6229,7216,7564,7292
Lifetime Engaged Users,178,1457,177,2211,671,1191,481,537,1530,280,...,951,956,753,1179,810,733,537,625,626,564


In [15]:
# e. Melting Data to long format(wide -> long)
melted_df = pd.melt(df, id_vars=['Type', 'Category'], 
                    value_vars=['like', 'share', 'comment'],
                    var_name='Engagement_Type', value_name='Count')
melted_df


Unnamed: 0,Type,Category,Engagement_Type,Count
0,Photo,2,like,79.0
1,Status,2,like,130.0
2,Photo,3,like,66.0
3,Photo,2,like,1572.0
4,Photo,2,like,325.0
...,...,...,...,...
1495,Photo,3,comment,5.0
1496,Photo,2,comment,0.0
1497,Photo,1,comment,4.0
1498,Photo,3,comment,7.0


In [12]:
# f. Casting data to wide format (long -> wide)
cast_df = melted_df.pivot_table(index=['Type', 'Category'], 
                                columns='Engagement_Type', 
                                values='Count', aggfunc='sum').reset_index()
cast_df.head()


Engagement_Type,Type,Category,comment,like,share
0,Link,1,58.0,1513.0,254.0
1,Link,2,2.0,32.0,15.0
2,Link,3,2.0,68.0,15.0
3,Photo,1,1091.0,23184.0,3251.0
4,Photo,2,1064.0,21463.0,3231.0
