# Congressional tweets mentioning Roe since opinion leak

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import json
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
import re

In [3]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

sid = SentimentIntensityAnalyzer()
import re
import nltk

words = set(nltk.corpus.words.words())

In [4]:
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("spacytextblob")

<spacytextblob.spacytextblob.SpacyTextBlob at 0x1660d3970>

In [5]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [6]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

## Read data

#### Tweets by all members of Congress since mid-March

In [7]:
src = pd.read_csv(
    "data/processed/congress_tweets_all.csv",
    dtype={
        "tweet_id": str,
        "twitter_id": str,
        "conversation_id": str,
        "author_id": str,
    },
    low_memory=False,
)

#### Into a dataframe

In [8]:
df = src[src["created_date_uct"].notna()].copy()

---

## Roe analysis

#### Which tweets mention Roe? 

In [9]:
df_since_leak = df[df["created_date_eastern"] >= "2022-05-02"].copy()

In [10]:
df_since_leak["mentions_roe"] = df_since_leak["tweet_text"].str.contains("Roe")

In [11]:
roe_mentions = df_since_leak[df_since_leak["mentions_roe"] == True].copy()

#### How many? 

In [12]:
len(roe_mentions)

563

#### What share mention Roe?

In [13]:
df_since_leak.mentions_roe.value_counts(normalize=True).round(2) * 100

False    93.0
True      7.0
Name: mentions_roe, dtype: float64

#### Group by gender and Roe since the leak

In [14]:
gender_roe_since_monday = (
    df_since_leak.groupby(["gender", "party", "mentions_roe"])
    .agg({"tweet_id": "count"})
    .reset_index()
)

In [15]:
gender_roe_since_monday

Unnamed: 0,gender,party,mentions_roe,tweet_id
0,F,Democrat,False,1590
1,F,Democrat,True,225
2,F,Republican,False,568
3,F,Republican,True,12
4,M,Democrat,False,2364
5,M,Democrat,True,244
6,M,Independent,False,19
7,M,Independent,True,1
8,M,Republican,False,2544
9,M,Republican,True,81


#### Group by party and Roe since the leak

In [16]:
parties_roe_since_monday = (
    df_since_leak.groupby(["party", "mentions_roe"])
    .agg({"tweet_id": "count"})
    .reset_index()
)

In [17]:
parties_roe_since_monday

Unnamed: 0,party,mentions_roe,tweet_id
0,Democrat,False,3954
1,Democrat,True,469
2,Independent,False,19
3,Independent,True,1
4,Republican,False,3112
5,Republican,True,93


In [18]:
roe_pivot = parties_roe_since_monday.pivot(
    index="party", columns="mentions_roe", values="tweet_id"
).reset_index()

In [19]:
roe_pivot

mentions_roe,party,False,True
0,Democrat,3954,469
1,Independent,19,1
2,Republican,3112,93


In [20]:
roe_pivot["total"] = roe_pivot[False] + roe_pivot[True]

In [21]:
roe_pivot["share_mentioning_roe"] = round((roe_pivot[True] / roe_pivot["total"]) * 100)

In [22]:
roe_pivot

mentions_roe,party,False,True,total,share_mentioning_roe
0,Democrat,3954,469,4423,11.0
1,Independent,19,1,20,5.0
2,Republican,3112,93,3205,3.0


---

## Sentiment

In [23]:
df_since_leak.head(5)

Unnamed: 0,author_id,conversation_id,tweet_id,tweet_text,source,created_date_uct,retweets,likes,quotes,twitter,twitter_id,type,party,gender,created_date_eastern,created_time_eastern,month,weekday,year,hour,mentions_roe
10,3026622545,1522268103803125760,1522268103803125760,"The National Day of Prayer is a wonderful way to recognize the blessings of God, remember to be thankful, and look for God’s care in our nation and world.\nhttps://t.co/pVwIyQzUeW",Twitter Web App,2022-05-05 17:32:52+00:00,0,1,0,RepAmata,3026622545,rep,Republican,F,2022-05-05,13:32:52,May,Friday,2022,13,False
11,3026622545,1521873428436987907,1521873428436987907,Keeping our local Veterans informed. VA assures Congress of efforts on unavoidably missed appts and other access issues \nhttps://t.co/fE00g7HH2V,Twitter Web App,2022-05-04 15:24:35+00:00,1,1,0,RepAmata,3026622545,rep,Republican,F,2022-05-04,11:24:35,May,Friday,2022,11,False
12,3026622545,1521560638727901185,1521560638727901185,"An article on our historic First Marine Battalion that defended American Samoa in World War II, we’ll always be proud of them!\n\nhttps://t.co/cVk0NXBozL",Twitter Web App,2022-05-03 18:41:40+00:00,0,1,1,RepAmata,3026622545,rep,Republican,F,2022-05-03,14:41:40,May,Friday,2022,14,False
13,3026622545,1521559842007265280,1521559842007265280,"Happy Teacher Appreciation Day! Someday, a former student will be naming you among their inspirations and reasons for success.\nhttps://t.co/XYJ6XOJThO https://t.co/pHp7peF4rX",Twitter Web App,2022-05-03 18:38:30+00:00,0,1,0,RepAmata,3026622545,rep,Republican,F,2022-05-03,14:38:30,May,Friday,2022,14,False
146,234128524,1522567468123885569,1522567468123885569,94 percent of Americans say they are upset or concerned about inflation.\n\nRising prices are the direct result of policy choices made by Democrats. We need to go a different direction.\n \nhttps://t.co/njHsFURVDg,Twitter Web App,2022-05-06 13:22:26+00:00,1,1,0,SenToddYoung,234128524,sen,Republican,M,2022-05-06,09:22:26,May,Friday,2022,9,False


#### Clean tweet text a bit

In [24]:
def cleaner(tweet):

    # Remove @ sign
    tweet = re.sub("@[A-Za-z0-9]+", "", tweet)

    # Remove http links
    tweet = re.sub(r"(?:\@|http?\://|https?\://|www)\S+", "", tweet)
    tweet = " ".join(tweet.split())
    tweet = tweet.replace("#", "").replace("_", " ")

    # Remove hashtag sign but keep the text
    tweet = " ".join(
        w
        for w in nltk.wordpunct_tokenize(tweet)
        if w.lower() in words or not w.isalpha()
    )
    return tweet


roe_mentions["tweet_text_clean"] = roe_mentions["tweet_text"].astype(str).apply(cleaner)

In [25]:
score_list = []
for index, row in roe_mentions.iterrows():
    score_dict = {
        "user": row["twitter"],
        "party": row["party"],
        "gender": row["gender"],
        "id": row["tweet_id"],
        "text": row["tweet_text"],
        "retweet_count": row["retweets"],
        "like_count": row["likes"],
        "quote_count": row["quotes"],
        "est_time": row["created_date_eastern"],
        "est_date": row["created_time_eastern"],
        "score": sid.polarity_scores(str(row["tweet_text_clean"]))["compound"],
    }

    score_list.append(score_dict)

In [26]:
nltk_df = pd.DataFrame(score_list)

In [27]:
def sentiment_category(sentiment):
    label = ""
    if sentiment > 0:
        label = "positive"
    elif sentiment == 0:
        label = "neutral"
    else:
        label = "negative"
    return label


nltk_df["sentiment_category"] = nltk_df["score"].apply(sentiment_category)

In [28]:
nltk_df.head()

Unnamed: 0,user,party,gender,id,text,retweet_count,like_count,quote_count,est_time,est_date,score,sentiment_category
0,SenToddYoung,Republican,M,1521552889222922241,"I do believe that Roe v Wade was wrongly decided, and that the American people should be able to decide, through their elected officials, what our laws are when it comes to protecting life and protecting women.",4,66,5,2022-05-03,14:10:52,0.0,neutral
1,RepWilson,Democrat,F,1521482942131888128,"This draft decision from the Supreme Court would limit our freedom to make deeply personal choices about our bodies by overturning #RoeVWade.\n\nAs I said back before, it’s a woman’s body, not yours. She alone bears the burden, pain, and joy that it brings. https://t.co/ekwbCmRBgq",8,24,0,2022-05-03,09:32:55,0.7987,positive
2,MarkWarner,Democrat,M,1521586575733362697,"If the Supreme Court overturns Roe, it won’t stop abortions. It will just cause unsafe abortions.",151,986,11,2022-05-03,16:24:44,0.7269,positive
3,MarkWarner,Democrat,M,1521467172656226304,"I’m outraged by the reported leaked SCOTUS decision overturning Roe. \n\nI believe abortion care is health care, and I’ll keep fighting for that in the Senate.",212,1616,24,2022-05-03,08:30:16,0.5994,positive
4,SenWhitehouse,Democrat,M,1522335075500580864,We need two things right now: \n1. Codify Roe\n2. End special-interest control of the Court,4148,20305,157,2022-05-05,17:59:00,0.6908,positive


In [29]:
nltk_df.groupby(["party", "sentiment_category"]).agg({"id": "count"}).reset_index()

Unnamed: 0,party,sentiment_category,id
0,Democrat,negative,151
1,Democrat,neutral,66
2,Democrat,positive,252
3,Independent,neutral,1
4,Republican,negative,27
5,Republican,neutral,15
6,Republican,positive,51


---

In [30]:
doc_list = []

for index, row in roe_mentions.iterrows():
    # doc = nlp(row["tweet_text"])
    sent_dict = {
        "user": row["twitter"],
        "party": row["party"],
        "gender": row["gender"],
        "id": row["tweet_id"],
        "text": row["tweet_text"],
        "retweet_count": row["retweets"],
        "like_count": row["likes"],
        "quote_count": row["quotes"],
        "est_time": row["created_date_eastern"],
        "est_date": row["created_time_eastern"],
        "polarity": nlp(row["tweet_text"])._.blob.polarity,
        "subjectivity": nlp(row["tweet_text"])._.blob.subjectivity,
    }
    doc_list.append(sent_dict)

In [31]:
sent_df = pd.DataFrame(doc_list)

In [32]:
def sentiment_category(sentiment):
    label = ""
    if sentiment > 0:
        label = "positive"
    elif sentiment == 0:
        label = "neutral"
    else:
        label = "negative"
    return label


sent_df["sentiment_category"] = sent_df["polarity"].apply(sentiment_category)

In [33]:
sent_df.sentiment_category.value_counts()

positive    288
neutral     152
negative    123
Name: sentiment_category, dtype: int64

In [34]:
sent_df.groupby(["party", "sentiment_category"]).agg({"id": "count"}).reset_index()

Unnamed: 0,party,sentiment_category,id
0,Democrat,negative,98
1,Democrat,neutral,121
2,Democrat,positive,250
3,Independent,neutral,1
4,Republican,negative,25
5,Republican,neutral,30
6,Republican,positive,38


---

## Export

In [35]:
roe_pivot.to_csv("data/processed/congress_tweets_roe_since_monday.csv", index=False)

In [36]:
roe_mentions.to_csv("data/processed/congress_tweets_roe.csv", index=False)