In [1]:
import pandas as pd
import numpy as np
import altair as alt

facebook_df = pd.read_csv('aggr_fb_df_v2.csv')
youtube_df = pd.read_csv('yt_v4_after_midterm.csv')
twitter_df = pd.read_csv('twitter_v2.csv')
instagram_df = pd.read_csv('aggr_ig_df_v2.csv')

dataset = {"Facebook": facebook_df, "Youtube": youtube_df, "Twitter": twitter_df, "Instagram": instagram_df}
color_picker = {"Facebook": "#3258a8", "Youtube": "#f23c0a", "Twitter": "#11ddf7", "Instagram": "#983bf5"}

In [2]:
total_post_dictionary = {}
num_rows = 0
for platform in list(dataset.keys()):
  mean_total_post = dataset[platform]["total_num_post"].mean() / 365
  total_post_dictionary[num_rows] = [platform, mean_total_post, color_picker[platform]]
  num_rows += 1

total_posts = pd.DataFrame.from_dict(total_post_dictionary, orient = "index", columns = ["platform", "value", "color"])

In [3]:
post_activities_dictionary = {}
num_rows = 0

for platform in list(dataset.keys()):
  for quarter in range(1, 5):
    mean_post_per_user = (dataset[platform]["q"+str(quarter)] * dataset[platform]["total_num_post"] / (100*90)).mean()
    pct_change = 0
    if quarter != 1:
      prev_val = post_activities_dictionary[num_rows - 1][2]
      pct_change = (mean_post_per_user - prev_val) * 100 / prev_val
    post_activities_dictionary[num_rows] = [platform, "Q" + str(quarter) + ":2020", mean_post_per_user, pct_change, color_picker[platform]]
    num_rows += 1


post_activities = pd.DataFrame.from_dict(post_activities_dictionary, orient = "index", columns = ["platform", "quarter", "value", "percentage_change", "color"])

In [4]:
col_dict = {"Facebook": [["likes_per_post","loves_per_post","wows_per_post","hahas_per_post","angrys_per_post","sads_per_post"],	["comments_per_post"],	["shares_per_post"]],
                "Youtube": [["likes_per_post","dislikes_per_post","views_per_post"],["comments_per_post"],["shares_per_post"]],
                "Twitter": [["favorite_ratio_per_post"],["reply_per_post"],	["retweet_per_post"]],
                "Instagram": [["likes_per_post","views_per_post"],["comments_per_post"],["shares_per_post"]]}
total_engagement_dictionary = {}
num_rows = 0
aggs = {0: "reactions_per_post", 1: "comments_per_post", 2: "shares_per_post"}
for platform in list(dataset.keys()):
  for i in range(3):
    mean  = np.mean(dataset[platform][col_dict[platform][i]], axis = 0)
    order = i
    total_engagement_dictionary[num_rows] = [platform, aggs[i], sum(mean), color_picker[platform], order]
    num_rows += 1

total_engagements = pd.DataFrame.from_dict(total_engagement_dictionary, orient = "index", columns = ["platform", "attribute", "value", "color", "order"])

In [5]:
engagement_line = alt.Chart(total_engagements).mark_line(point = True, size = 5).encode(
    x = alt.X("attribute", sort = ["reactions_per_post", "comments_per_post", "shares_per_post"], axis = alt.Axis(title = "", labelAngle = 0)),
    y = alt.Y("value", axis = alt.Axis(title = "")),
    color = "platform",
).properties(
    width = 400,
    height = 300,
)

In [148]:
input_dropdown = alt.binding_select(options=list(dataset.keys()))
selector = alt.selection_single(fields=['platform'], bind=input_dropdown, name='Select')
color_scale =  alt.Color("platform:N", scale = alt.Scale(domain = list(color_picker.keys()), range = list(color_picker.values())))

num_posts_bar = alt.Chart(total_posts).mark_bar(size = 50).encode(
    x = alt.X("platform", axis = alt.Axis(title = "", labelAngle = 0)),
    y = alt.Y("value", axis=alt.Axis(title = "posts : day")),
    color=alt.condition(selector, color_scale, alt.value("lightgray")),
    tooltip=[alt.Tooltip("platform"), alt.Tooltip("value", title ='posts :day',format='.2f'), ]
).interactive(bind_y = False).properties(
    width = 400,
    height = 300,
    title = "Average Number of Posts Daily per User"
) 
post_activities_line = alt.Chart(post_activities).mark_line().encode(
    x=alt.X('quarter',axis = alt.Axis(title = '', labelAngle = 0)),
    y= alt.Y('value', axis=alt.Axis(title='posts: day'), scale = alt.Scale(domain = (0, 5.5))),
    color= alt.condition(selector, color_scale, alt.value("lightgray")),
).properties(
    width = 400,
    height = 300,
    title = "Average Number of Posts Daily per User over Time"
)
text = alt.Chart(post_activities).mark_text(dx=0, dy=-5).encode(
    x=alt.X("quarter"),
    y=alt.Y('value'),
    text=alt.condition(selector, alt.Text('value:Q', format='.2f'), alt.value(""))
)

panel1 = alt.hconcat(num_posts_bar, post_activities_line + text)

engagement_scatter = alt.Chart(total_engagements).mark_line(point = True, size = 5).encode(
    x = alt.X("attribute", sort = ["reactions_per_post", "comments_per_post", "shares_per_post"], axis = alt.Axis(title = "", labelAngle = 0)),
    y = alt.Y("value", axis = alt.Axis(title = "")),
    color = alt.condition(selector, color_scale, alt.value("lightgray")),
).properties(
    width = 400,
    height = 300,
).add_selection(
    selector
)

text2 = alt.Chart(total_engagements).mark_text(dx=0, dy=-5).encode(
    x=alt.X("attribute", sort = ["reactions_per_post", "comments_per_post", "shares_per_post"], ),
    y=alt.Y('value'),
    text=alt.condition(selector, alt.Text('value:Q', format='.2f'), alt.value(""))
)

alt.vconcat(panel1, engagement_scatter + text2)

In [139]:
engagement_dictionary = {}
num_rows = 0
for platform in list(dataset.keys()):
  engagemean = dataset[platform]["engagement_ratio_per_post"].mean()
  greatermean = sum(dataset[platform]["engagement_ratio_per_post"] >= engagemean)
  engagement_dictionary[num_rows] = [platform, "greater than mean", greatermean, color_picker[platform]]
  num_rows += 1
 
  engagemean = dataset[platform]["engagement_ratio_per_post"].mean()
  lessermean = sum(dataset[platform]["engagement_ratio_per_post"] < engagemean)
  engagement_dictionary[num_rows] = [platform,  "less than mean", lessermean, color_picker[platform]]
  num_rows += 1
    
engagement = pd.DataFrame.from_dict(engagement_dictionary, orient = "index", columns = ["platform", "category", "value", "color"])

In [140]:
percentage_dictionary = {}
num_rows = 0
for platform in list(dataset.keys()):
  percentengagemean = dataset[platform]["percent_change_fan"].mean()
  percentgreatermean = sum(dataset[platform]["percent_change_fan"] >= percentengagemean)
  percentage_dictionary[num_rows] = [platform, "percent great than mean",percentgreatermean, color_picker[platform]]
  num_rows += 1
    
  percentengagemean = dataset[platform]["percent_change_fan"].mean()
  percentlessermean = sum(dataset[platform]["percent_change_fan"] < percentengagemean)
  percentage_dictionary[num_rows] = [platform,"percent less than mean", percentlessermean, color_picker[platform]]
  num_rows += 1  
    
  percentlesserzero = sum(dataset[platform]["percent_change_fan"] < 0)
  percentage_dictionary[num_rows] = [platform, "percent less zero",percentlesserzero, color_picker[platform]]
  num_rows += 1
 
percentage = pd.DataFrame.from_dict(percentage_dictionary, orient = "index", columns = ["platform", "category", "value", "color"])

In [141]:
input_dropdown = alt.binding_select(options=list(dataset.keys()))
selector = alt.selection_single(fields=['platform'], bind=input_dropdown, name='Select')
color_scale2 =  alt.Color("category:N", scale = alt.Scale(domain = ["less than mean", "greater than mean", "percent less zero", "percent less than mean", "percent great than mean"], 
                                                          range = ["#8bfa1b", "#097a2b", "#6205ad", "#8bfa1b", "#097a2b"]))

engagement_bar = alt.Chart(engagement).mark_bar(size = 20).encode(
    x = alt.X("sum(value)", axis = alt.Axis(title = "Number of User", labelAngle = 0)),
    y = alt.Y("platform", axis = alt.Axis(title = "")),
    color= alt.condition(selector, color_scale2, alt.value("lightgray")),
    order = alt.Order("category", sort = "descending"),
    tooltip=["platform","value"]
).interactive().properties(
    width = 400,
    height = 150,
    title = "Engagement Ratio Per Post"
).add_selection(selector)



In [142]:
percentage_bar = alt.Chart(percentage).mark_bar(size = 20).encode(
    x = alt.X("sum(value)", axis = alt.Axis(title = "Number of User", labelAngle = 0)),
    y = alt.Y("platform", axis = alt.Axis(title = "")),
    color= alt.condition(selector, color_scale2, alt.value("lightgray")),
    order = alt.Order("category", sort = "descending"),
    tooltip=["platform","value"]
).interactive().properties(
    width = 400,
    height = 150,
    title = "Percent Change of Fan"
)

In [143]:
alt.vconcat(engagement_bar, percentage_bar)

In [151]:
alt.vconcat(panel1, engagement_scatter + text2 , 
            engagement_bar, percentage_bar,title=' SOCIAL MEDIA ANALYTICS'
).configure_axis(
    labelFontSize=20,
    titleFontSize=20
).configure_title(fontSize=35, offset=5, orient='top', anchor='middle')