# Process "Elon Musk" search results during Twitter announcement

#### Load Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import json

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [5]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")

---

## Get data

In [6]:
user = "elonmentions"

#### Read timeline pulled with the [Twarc library](https://twarc-project.readthedocs.io/en/latest/twarc2_en_us/)

In [7]:
jsons = []

with open(f"/Users/stiles/twarc2/elon_musk_search_announcement_hours.json") as f:
    for line in f:
        jsons.append(json.loads(line))

#### Loop over list of twitter jsons and extract elements into a dictionary

In [8]:
data_list = []

for j in jsons:
    for d in j["data"]:
        data_list.append(d)

#### Convert list of dictionaries to dataframe

In [36]:
elon_src = pd.DataFrame(data_list)

In [51]:
src = elon_src.copy()

#### Unpack nested columns

In [52]:
src["mentions"] = pd.json_normalize(src["entities"])

In [53]:
src[["retweet_count", "reply_count", "like_count", "quote_count"]] = pd.json_normalize(
    src["public_metrics"]
)

In [54]:
src[["media_keys", "poll_ids"]] = pd.json_normalize(src["attachments"])

#### Dates

In [55]:
# src["date"] = pd.to_datetime(src.created_at).dt.strftime("%Y-%m-%d")

#### Process dates

In [56]:
src["pst_datetime"] = (
    pd.to_datetime(src["created_at"])
    .dt.tz_convert("US/Pacific")
    .dt.strftime("%Y-%m-%d %H:%M:%S")
)

In [57]:
src["pst_time"] = (
    pd.to_datetime(src["created_at"])
    .dt.tz_convert("US/Pacific")
    .dt.strftime("%H:%M:%S")
)
src["pst_date"] = (
    pd.to_datetime(src["created_at"])
    .dt.tz_convert("US/Pacific")
    .dt.strftime("%Y-%m-%d")
)

In [58]:
src.columns

Index(['referenced_tweets', 'created_at', 'author_id', 'conversation_id', 'id',
       'public_metrics', 'text', 'entities', 'in_reply_to_user_id',
       'attachments', 'geo', 'withheld', 'mentions', 'retweet_count',
       'reply_count', 'like_count', 'quote_count', 'media_keys', 'poll_ids',
       'pst_datetime', 'pst_time', 'pst_date'],
      dtype='object')

#### Drop what we don't need

In [59]:
src.drop(
    [
        "referenced_tweets",
        "public_metrics",
        "attachments",
        "in_reply_to_user_id",
        "mentions",
        "media_keys",
        "withheld",
        "geo",
        "poll_ids",
        "created_at",
        "author_id",
    ],
    axis=1,
    inplace=True,
)

#### How many were retweets

In [60]:
src["retweet"] = src["text"].str[:2] == "RT"

---

In [61]:
df = src.copy()

In [62]:
df.head()

Unnamed: 0,conversation_id,id,text,entities,retweet_count,reply_count,like_count,quote_count,pst_datetime,pst_time,pst_date,retweet
0,1518711447311724545,1518711447311724545,RT @TitaniaMcGrath: Twitter is a private compa...,"{'mentions': [{'start': 3, 'end': 18, 'usernam...",3302,0,0,0,2022-04-25 14:59:59,14:59:59,2022-04-25,True
1,1518711447114510336,1518711447114510336,RT @DoliaEstevez: 👉🏽@Twitter confirma su venta...,"{'mentions': [{'start': 3, 'end': 16, 'usernam...",100,0,0,0,2022-04-25 14:59:59,14:59:59,2022-04-25,True
2,1518677066325053441,1518711446988763136,@EonAnglin @CrypticNoHoes @elonmusk And that's...,"{'mentions': [{'start': 0, 'end': 10, 'usernam...",0,1,8,0,2022-04-25 14:59:59,14:59:59,2022-04-25,False
3,1518711446942724097,1518711446942724097,RT @CaptAmazo: Friendly reminder that Elon Mus...,"{'mentions': [{'start': 3, 'end': 13, 'usernam...",49499,0,0,0,2022-04-25 14:59:59,14:59:59,2022-04-25,True
4,1518711446905016320,1518711446905016320,RT @SenWarren: This deal is dangerous for our ...,"{'mentions': [{'start': 3, 'end': 13, 'usernam...",20989,0,0,0,2022-04-25 14:59:59,14:59:59,2022-04-25,True


In [63]:
df.tail()

Unnamed: 0,conversation_id,id,text,entities,retweet_count,reply_count,like_count,quote_count,pst_datetime,pst_time,pst_date,retweet
1056486,1518651050076643329,1518651050076643329,"Elon Musk acorda, existem 2 notificações \n\n1...",,2,1,3,0,2022-04-25 11:00:00,11:00:00,2022-04-25,False
1056487,1518651049808207873,1518651049808207873,RT @willchamberlain: Every liberal who says th...,"{'mentions': [{'start': 3, 'end': 19, 'usernam...",5395,0,0,0,2022-04-25 11:00:00,11:00:00,2022-04-25,True
1056488,1518651049636024320,1518651049636024320,Elon Musk nearing deal to buy Twitter after ho...,,0,0,0,0,2022-04-25 11:00:00,11:00:00,2022-04-25,False
1056489,1518651049627815938,1518651049627815938,RT @AliCologne: Linke haben keine Angst das El...,"{'mentions': [{'start': 3, 'end': 14, 'usernam...",257,0,0,0,2022-04-25 11:00:00,11:00:00,2022-04-25,True
1056490,1518651049522999297,1518651049522999297,"RT @ZigaTurk: Da je Elon Musk kupil Twitter, j...","{'mentions': [{'start': 3, 'end': 12, 'usernam...",31,0,0,0,2022-04-25 11:00:00,11:00:00,2022-04-25,True


---

## Export

In [64]:
df.to_json(
    "/Users/stiles/twarc2/elon_musk_search_announcement_hours_processed.json",
    orient="records",
    indent=4,
)