### Functions for later...

In [15]:
import datetime
import os
import sys
sys.path.insert(0,"../code")

import altair as alt
alt.data_transformers.disable_max_rows()
import pandas as pd

from collections import Counter, defaultdict
from copy import deepcopy

from nltk.corpus import stopwords
s_words = set(stopwords.words())

from ct_helpers import ct_get_lists, download_posts
from fb_model import FbIgPost


def clean_text(text):
    """
    A convenience function for cleantext.clean because it has an ugly amount
    of parameters.
    """
    return clean(
        text,
        fix_unicode=True,  # fix various unicode errors
        to_ascii=True,  # transliterate to closest ASCII representation
        lower=True,  # lowercase text
        no_line_breaks=True,  # fully strip line breaks as opposed to only normalizing them
        no_urls=True,  # replace all URLs with a special token
        no_emoji=True,  # remove emojis
        no_emails=True,  # replace all email addresses with a special token
        no_phone_numbers=True,  # replace all phone numbers with a special token
        no_numbers=False,  # replace all numbers with a special token
        no_digits=False,  # replace all digits with a special token
        no_currency_symbols=False,  # replace all currency symbols with a special token
        no_punct=True,  # remove punctuations
        replace_with_punct="",  # instead of removing punctuations you may replace them
        replace_with_url="",
        replace_with_email="",
        replace_with_phone_number="<PHONE>",
        replace_with_number="<NUMBER>",
        replace_with_digit="0",
        replace_with_currency_symbol="<CUR>",
        lang="en",  # set to 'de' for German special handling
    )

---

#  🎤 🎸 🥁 🎹 🎺 Exploring musical artists Facebook data 🎤 🎸 🥁 🎹 🎺

In this notebook, we'll walk through how to download and explore posts from a specified list of Facebook accounts. This list will have to be created first with the Facebook-owned insights tool, [CrowdTangle](https://crowdtangle.com/).

Note: There is no cost for using CrowdTangle, but it is only available to select Facebook publishing partners and academics. Check out their [FAQs](https://help.crowdtangle.com/en/collections/41331-faqs-and-troubleshooting) for more information.

---

### [Take me to the CrowdTangle Dashboard](https://apps.crowdtangle.com/socialmediamanipulationclass)
### Part 1: Exploring the dashboard.
1. Lists/searches
2. Intelligence
3. Live displays

---

### Part 2: Two ways to get the data from the dashboard

1. Using the dashboard
2. Using a little code (API) <--- preferred 🤘

## Part 3: Exploring the posts

1. **Types of posts sent**
2. **Words used most**
3. **Most basic interactions**:
    1. likes
    2. shares
    3. comments

4. **Most emojis**:
    1. wows
    2. hahas
    3. sad
    4. angry
    5. thankful
    6. care
5. **Trajectory of most popular post (likes)**

## Get the data for all musical artists
---

### 1. Get the list ID

In [None]:
# Load the CT API token
api_token = os.environ.get("CLASS_CT_TOKEN")

# Returns all lists in the dashboard
list_records = ct_get_lists(api_token)

my_list_name = "Musical Artists for Social Media Manipulation"

# Extract the list ID number for that matches my list name
for record in list_records["result"]['lists']:
    id_number = record["id"]
    list_title = record["title"]
    
    if list_title == my_list_name:
        print("Found the list ID number.")
        break

print(f"List ID #: {id_number}")

### Set start and end date

In [None]:
# Set up the time between which we'll retrieve data

num_days_back = 30*6

end_dt = datetime.datetime.today().date()
end_time = end_dt.strftime("%Y-%m-%d")
start_time = (
    end_dt - datetime.timedelta(days=num_days_back)
).strftime("%Y-%m-%d")

print(f"We're going to search {num_days_back} days in the past")
print(f"Start time: {start_time}")
print(f"End time  : {end_time}")

In [None]:
posts = download_posts(
    crowdtangle_list_id=id_number, #<---- The number associated with out list
    start=start_time,
    end=end_time,
    max_queries=100,               #<---- Make no more than this number of queries
    api_token=api_token            #<---- API token for access
)

# Questions for candy!
# 🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫

![display image](../gifs/despicable-me-candy.gif)
![display image](../gifs/homer-sweet-sweet-candy.gif)
![display image](../gifs/psycho-oprah-candy.gif)

# 🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫🍬🍫

- ## What artist earned the **most interactions overall**?
- ## What type of interaction typically contributes the most to an artists interactions?
- ## Artist with the most emojis total for ➡️ ❤️😡😆
    - Which artist earned the most **love** emojis (total)?
    - Which artist earned the most **angry** emojis (total)?
    - Which artist earned the most **haha** emojis (total)?
- ## Artist with the most emojis from a single post ➡️ ❤️😡😆
    - Which artist earned earned the most **love** in a (single post)?
    - Which artist earned the most **angry** emojis in a (single post)?
    - Which artist earned the most **haha** emojis in a (single post)?

---
---
---
---

### What does the data look like?

In [None]:
type(posts)

In [None]:
len(posts)

In [None]:
posts[0]

In [None]:
post_objs = [FbIgPost(post) for post in posts] 

### Organize the data a little bit...

In [None]:
text_data = []
performance_data = []

for post in post_objs:
    
    # Handle all posts
    base_record = {
        "id" : post.get_post_ID(),
        "username" : post.get_value(['account','name']),
        "date" : post.post_object["date"],
        "link" : post.get_link_to_post()
    }
    text_record = deepcopy(base_record)
    stat_record = deepcopy(base_record)
    
    
    text_info = {
        "text" : post.get_text(),
        "post_type" : post.get_post_type(),
    }
    text_record.update(text_info)
    text_data.append(text_record)
    
    
    stat_info = post.get_value(["statistics", "actual"])
    stat_record.update(stat_info)
    performance_data.append(stat_record)

In [None]:
stats_df = pd.DataFrame.from_records(performance_data)
stats_df

# Q1: What artist earned the **most interactions overall**?

In [None]:
# These are the columns we want to count
responses = [
    "likeCount",
    "shareCount",
    "commentCount",
    "wowCount", 
    "hahaCount",
    "sadCount", 
    "careCount"
]

# Calculate the total interactions per emoji option
total_response_counts = stats_df.groupby('username')[responses].sum().reset_index()
total_response_counts.head(2)

In [None]:
# Take the total across the rows for each artist...
total_interactions = pd.DataFrame(total_response_counts[responses].sum(axis=1), columns=['counts'])
total_interactions["username"] = total_response_counts['username']

In [None]:
bars = alt.Chart(
    total_interactions,
    title="Most interactions total 🤑"
).mark_bar(color='firebrick').encode(
    x = alt.X("counts:Q", title="Total interactions"),
    y = alt.Y("username:N", sort="-x", title=None)
).properties(
    width=700,
    height=400
)


text = alt.Chart(
    total_interactions
).mark_text(align='left', fontWeight="bold", fontSize=15).encode(
    x = alt.X("counts:Q", title="Total interactions"),
    y = alt.Y("username:N", sort="-x", title=None),
    text = alt.Text("counts:N", format=',')
).properties(
    width=700,
    height=400
)

alt.layer(bars,text).configure_axis(
    titleFontSize=15,
    labelFontSize=15
).configure_legend(
    titleFontSize=15,
    labelFontSize=15
).configure_title(
    fontSize=20,
)

# Q2: What type of interaction typically contributes the most to an artists interactions? 

In [None]:
props = total_response_counts[responses].divide(, axis=0)
props['username'] = total_response_counts['username'].copy()

In [None]:
alt.Chart(
    props.melt(id_vars='username'),
    title = "Interaction percentage breakdown"
).mark_bar().encode(
    x = alt.X("value:Q", title="Percent of all interactions", axis=alt.Axis(format="%"), scale=alt.Scale(domain=[0,1])),
    y = alt.Y("username", title=None),
    color = alt.Color("variable:N", title="Interaction type", sort="-x", legend=alt.Legend(orient='bottom')),
    tooltip = [
        alt.Tooltip('value:Q', title="Percent", format=".2%"),
        alt.Tooltip('variable:N', title="Interaction")
    ]
).properties(
    width=800,
    height=400
).configure_axis(
    titleFontSize=15,
    labelFontSize=15
).configure_legend(
    titleFontSize=15,
    labelFontSize=16
).configure_title(
    fontSize=20,
)

# Q3: Artist with the most emojis total for ➡️ ❤️😡😆
- Which artist earned the most **love** emojis (total)?
- Which artist earned the most **angry** emojis (total)?
- Which artist earned the most **haha** emojis (total)?


In [None]:
love_total = stats_df.groupby("username")['loveCount'].sum().to_frame("count").reset_index()
angry_total = stats_df.groupby("username")['angryCount'].sum().to_frame("count").reset_index()
haha_total = stats_df.groupby("username")['hahaCount'].sum().to_frame("count").reset_index()


## Q3.1 Total loves

In [None]:
# Create bar chart
bars = alt.Chart(
    love_total,
    title="Total ❤️ for each artist"
).mark_bar(color='forestgreen').encode(
    x = alt.X("count:Q", title = "Total loves"),
    y = alt.Y("username:N", sort="-x", title=None),
).properties(
    width=700,
    height=400
)

# Add the total number of loves for each artist
text = alt.Chart(love_total).mark_text(align='left', fontSize=15).encode(
    x = alt.X("count:Q", title = "Total loves"),
    y = alt.Y("username:N", sort="-x", title=None),
    text = alt.Text("count:Q", format=",")
)

# Put them together and configure some aesthetics
alt.layer(bars, text).configure_axis(
    titleFontSize=15,
    labelFontSize=15
).configure_title(fontSize=20)

## Q3.2 Total Angry

In [None]:
# Create bar chart
bars = alt.Chart(
    angry_total,
    title="Total 😡 for each artist"
).mark_bar(color='red').encode(
    x = alt.X("count:Q", title = "Total angry"),
    y = alt.Y("username:N", sort="-x", title=None),
).properties(
    width=700,
    height=400
)

# Add the total number of loves for each artist
text = alt.Chart(angry_total).mark_text(align='left', fontSize=15).encode(
    x = alt.X("count:Q", title = "Total angry"),
    y = alt.Y("username:N", sort="-x", title=None),
    text = alt.Text("count:Q", format=",")
)

# Put them together and configure some aesthetics
alt.layer(bars, text).configure_axis(
    titleFontSize=15,
    labelFontSize=15
).configure_title(fontSize=20)

## Q3.3 Total haha

In [None]:
# Create bar chart
bars = alt.Chart(
    haha_total,
    title="Total 😆 for each artist"
).mark_bar(color='blue').encode(
    x = alt.X("count:Q", title = "Total haha"),
    y = alt.Y("username:N", sort="-x", title=None),
).properties(
    width=700,
    height=400
)

# Add the total number of loves for each artist
text = alt.Chart(haha_total).mark_text(align='left', fontSize=15).encode(
    x = alt.X("count:Q", title = "Total haha"),
    y = alt.Y("username:N", sort="-x", title=None),
    text = alt.Text("count:Q", format=",")
)

# Put them together and configure some aesthetics
alt.layer(bars, text).configure_axis(
    titleFontSize=15,
    labelFontSize=15
).configure_title(fontSize=20)

# Q4 Artist with the most emojis from a single post ➡️ ❤️😡😆
- Which artist earned earned the most **love** in a (single post)?
- Which artist earned the most **angry** emojis in a (single post)?
- Which artist earned the most **haha** emojis in a (single post)?

In [None]:
top_count = 10

toploved = stats_df.sort_values("loveCount", ascending=False).head(top_count).reset_index(drop=True)
topangry = stats_df.sort_values("angryCount", ascending=False).head(top_count).reset_index(drop=True)
tophaha = stats_df.sort_values("hahaCount", ascending=False).head(top_count).reset_index(drop=True)

In [None]:
print(f"Top {top_count} Loved Posts:")
for rank in range(0, top_count):
    print(toploved.iloc[rank].link)

In [None]:
print(f"Top {top_count} Angry Posts:")
for rank in range(0, top_count):
    print(topangry.iloc[rank].link)

In [None]:
print(f"Top {top_count} Haha Posts:")
for rank in range(0, top_count):
    print(topangry.iloc[rank].link)

## Q4.1 Which artist earned earned the most ❤️ in a (single post)?

In [None]:
alt.Chart(
    toploved,
    title = "Top 🔟 ❤️ posts"
).mark_circle(size=100, color='black').encode(
    x = alt.X("loveCount:Q"),
    y = alt.Y("username:N", sort='-x', title=None),
    href = alt.Href("link:N"),
    tooltip = [
        alt.Tooltip("loveCount:Q", format=",")
    ]
).properties(
    width=700,
#     height=300
).configure_axis(
    titleFontSize=15,
    labelFontSize=15
).configure_title(fontSize=20)

## Q4.2 Which artist earned earned the most 😡 in a (single post)?

In [None]:
alt.Chart(
    topangry,
    title = "Top 🔟 😡 posts"
).mark_circle(size=100, color='black').encode(
    x = alt.X("angryCount:Q"),
    y = alt.Y("username:N", sort='-x', title=None),
    href = alt.Href("link:N"),
    tooltip = [
        alt.Tooltip("angryCount:Q", format=",")
    ]
).properties(
    width=700,
#     height=300
).configure_axis(
    titleFontSize=15,
    labelFontSize=15
).configure_title(fontSize=20)

## Q4.3 Which artist earned earned the most 😆 in a (single post)?

In [None]:
alt.Chart(
    tophaha,
    title = "Top 🔟 😆 posts"
).mark_circle(size=100, color='black').encode(
    x = alt.X("hahaCount:Q"),
    y = alt.Y("username:N", sort='-x', title=None),
    href = alt.Href("link:N"),
    tooltip = [
        alt.Tooltip("hahaCount:Q", format=",")
    ]
).properties(
    width=700,
#     height=300
).configure_axis(
    titleFontSize=15,
    labelFontSize=15
).configure_title(fontSize=20)

In [None]:
pd.DataFrame().divide()

In [None]:
alt.Chart(top3angry).mark_bar

In [None]:
print(stats_df.iloc[406].link)

In [None]:
post_objs[0].post_object.keys()

In [None]:
post_objs[0].post_object.keys()

In [None]:
post_objs[0].post_object['history']

In [None]:
stats_df.columns

In [None]:
most_loves = stats_df.groupby("username")['loveCount'].sum().to_frame("count").reset_index()
most_angry = stats_df.groupby("username")['angryCount'].sum().to_frame("count").reset_index()

In [None]:
alt.Chart(most_angry).mark_bar().encode(
    x = alt.X("count:Q"),
    y = alt.Y("username", sort = "-x")
)

In [None]:
alt.Chart(most_loves).mark_bar().encode(
    x = alt.X("count:Q"),
    y = alt.Y("username", sort = "-x")
)

In [None]:
for post in posts:
    fb_post = FbIgPost(post)
    print(fb_post.get_link_to_post())

In [None]:
stats_df['pd_date'] = pd.to_datetime(stats_df.date)

In [None]:
stats_df

In [None]:
max_likes = stats_df.groupby(['username'])['likeCount'].max().to_frame("count").reset_index()
max_likes["type"] = "likes"

mean_likes = stats_df.groupby(['username'])['likeCount'].mean().to_frame("count").reset_index()
mean_likes["type"] = "likes"

max_comments = stats_df.groupby(['username'])['commentCount'].max().to_frame("count").reset_index()
max_comments["type"] = "comments"

mean_comments = stats_df.groupby(['username'])['commentCount'].mean().to_frame("count").reset_index()
mean_comments["type"] = "comments"

In [None]:
post.get_link_to_post()

In [None]:
max_mean_df = pd.concat([
    max_likes,
    mean_likes,
    max_comments,
    mean_comments
])

In [None]:
max_mean_df

In [None]:
stats_df

In [None]:
stats_df.sum()

In [None]:
bars = alt.Chart(stats_df, title="Single Most Liked Post per Artist").mark_bar().encode(
    x = alt.X("max(likeCount):Q"),
    y = alt.Y(
        "username:N",
        axis = alt.Axis(
            title = None),
        sort="-x"
    ),
#     color = alt.Color(
#         "max(likeCount):N",
#         legend=None
#     ),
    href="link:N"
).properties(width=600, height=500)

text = alt.Chart(stats_df).mark_text(dx=30, fontSize=16).encode(
    x = alt.X("max(likeCount):Q"),
    y = alt.Y(
        "username:N",
        axis = alt.Axis(
            title = None),
        sort="-x"
    ),
    text = alt.Text(
        "max(likeCount):N",
        format = ","
    )
).properties(width=600, height=500)


alt.layer(bars,text).configure_axis(
    labelFontSize=16,
    titleFontSize=16
).configure_title(fontSize=18)

In [None]:

alt.Chart(stats_df).mark_bar().encode(
    y = alt.Y("username:N", sort="-x"),
    x = alt.X("max(likeCount):Q"),
    color = alt.Color("username:N", legend=None)
)

In [None]:

alt.Chart(stats_df).mark_bar().encode(
    y = alt.Y("username:N", sort="-x"),
    x = alt.X("mean(likeCount):Q"),
    color = alt.Color("username:N", legend=None)
)

In [None]:
max_likes_per_user = stats_df.groupby(["username"])["likeCount"].max().to_frame("max_likes").reset_index()
max_likes_per_user

In [None]:
num_posts = Counter(stats_df["username"])

In [None]:
num_posts_df = pd.DataFrame.from_dict(num_posts,orient='index').reset_index().rename(columns={0:"num_posts", "index":'username'})

In [None]:
num_posts_df

In [None]:
merged = max_likes_per_user.merge(num_posts_df, on= "username")#.melt(id_vars="username")

In [None]:
merged

In [None]:
alt.Chart(merged).mark_point().encode(
    x = alt.X("max_likes:Q"),
    y = alt.Y("num_posts:Q")
)

In [None]:
stats_df.query()

In [None]:
post.post_object["date"]

In [None]:
stats_df['date_str'] = (stats_df.pd_date.dt.year).astype(str) + "_" + (stats_df.pd_date.dt.month).astype(str)

In [None]:
stats_df

In [None]:
mean_stats_df = stats_df.groupby(["username","date_str"])["likeCount"].mean().reset_index()

In [None]:
alt.Chart(stats_df).mark_point().encode(
    x = alt.X("pd_date:T"),
    y = alt.Y("likeCount:Q", scale=alt.Scale(type='log')),
    color = alt.Color("username:N"),
    shape = alt.Shape("username:N"),
    tooltip = [
        alt.Tooltip('username:N', title="User"),
        alt.Tooltip('pd_date:T', title="Date", format="%b %d"),
        alt.Tooltip('likeCount:N', title="# Likes", format=","),
    ]
).properties(
    width=700,
    height=500
).configure_axis(
    titleFontSize=15,
    labelFontSize=15
)

In [None]:
mean_stats_df

In [None]:
text_df = pd.DataFrame.from_records(text_data)
text_df

In [None]:
post_type_counts = text_df.post_type.value_counts().to_frame("count").reset_index().rename(
        columns={"index": "post_type"}
    )

In [None]:
post_type_counts['percentage'] = (post_type_counts['count'] / len(posts)) * 100
post_type_counts['proportion'] = post_type_counts['count'] / len(posts)
post_type_counts

In [None]:
bars = alt.Chart(post_type_counts, title="Post Type Breakdown").mark_bar().encode(
    x = alt.X("count:Q"),
    y = alt.Y(
        "post_type:N",
        axis = alt.Axis(
            title = None),
        sort="-x"
    ),
    color = alt.Color(
        "post_type:N",
        legend=None
    ),
    tooltip = [
        alt.Tooltip("post_type:N", title = "Post Type"),
        alt.Tooltip("count:Q", title = "# of Posts"),
        alt.Tooltip("percentage:Q", title = "% of Posts", format=".2f"),
    ]
).properties(width=600,height=200)

text = alt.Chart(post_type_counts).mark_text(dx=30, fontSize=16).encode(
    x = alt.X("count:Q"),
    y = alt.Y(
        "post_type:N",
        axis = alt.Axis(
            title = None),
        sort="-x"
    ),
    text = alt.Text(
        "proportion:N",
        format = ".2%"
    )
).properties(width=600)


alt.layer(bars,text).configure_axis(
    labelFontSize=16,
    titleFontSize=16
).configure_title(fontSize=18)

### Clean text to get word counts...

In [None]:
for post in text_df.text:
    print(post, "\n")
    print("-"*50)
    

In [None]:
text_df['clean_text'] = text_df['text'].map(clean_text)

In [None]:
for post in text_df.clean_text:
    print(post, "\n")
    print("-"*50)

In [None]:
user_word_counter = defaultdict(Counter)

for idx, row in text_df.iterrows():
    user = row['username']
    clean_text = row['clean_text'].split()
    
    for word in clean_text:
        if word not in s_words:
            user_word_counter[user][word] += 1

In [None]:
user_word_count_data = []

for user, data in user_word_counter.items():
    
    for word, count in data.items():
        user_word_count_data.append({
            "user" : user,
            "word" : word,
            "count" : count
        })

In [None]:
user_word_count_df = pd.DataFrame.from_records(user_word_count_data)
user_word_count_df

In [None]:
hover = alt.selection_single(
    on='mouseover',  # select on mouseover
    nearest=True,    # select nearest point to mouse cursor
    empty='none'     # empty selection should match nothing
)

click = alt.selection_multi(
    empty='none' # empty selection matches no points
)


plot = alt.Chart(
    title="Words Most-commonly Used by an Artist"
).mark_circle(size=50, opacity=.5).encode(
    x = alt.X(
        "user:N", title = None,
        axis = alt.Axis(labelAngle=45)
    ),
    y = alt.Y("count:Q"),
)

base = plot.transform_filter(
    hover | click # filter to points in either selection
)


alt.layer(
    plot.add_selection(hover).add_selection(click),
    base.mark_point(size=100, stroke='firebrick', strokeWidth=2),
    base.mark_text(dx=4, dy=-8, align='left', stroke='white', strokeWidth=3, fontSize=15).encode(text='word:N'),
    base.mark_text(dx=4, dy=-8, align='left', fontSize=15).encode(text='word:N'),
    data=user_word_count_df
).properties(
    width=800,
    height=450
).configure_axis(
    labelFontSize=15,
    titleFontSize=15
).configure_title(fontSize=18)
