## Processing tweets

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import jenkspy
import matplotlib.pyplot as plt

%matplotlib inline
import json
import numpy as np
import altair as alt
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the archive

In [3]:
src = pd.read_json(
    "../../twint/mattgaetz.json",
    convert_dates=["created_at", "date", "time"],
    dtype={"timezone": str},
    date_unit="s",
    lines=True,
)

In [4]:
src.sort_values("date", ascending=False).head()

Unnamed: 0,id,conversation_id,created_at,date,time,timezone,user_id,username,name,place,tweet,language,mentions,urls,photos,replies_count,retweets_count,likes_count,hashtags,cashtags,link,retweet,quote_url,video,thumbnail,near,geo,source,user_rt_id,user_rt,retweet_id,reply_to,retweet_date,translate,trans_src,trans_dest
0,1377030479136882688,1377030477257773056,2021-03-30 15:50:24 PDT,2021-03-30,15:50:24,-700,58579942,mattgaetz,Matt Gaetz,,...of the ongoing extortion investigation. I...,en,[],[],[],6243,4201,19279,[],[],https://twitter.com/mattgaetz/status/137703047...,False,,0,,,,,,,,[],,,,
3,1376986940394848261,1376986940394848256,2021-03-30 12:57:24 PDT,2021-03-30,12:57:24,-700,58579942,mattgaetz,Matt Gaetz,,This article identifies me as a “South Florida...,en,[],[],[],2282,501,2258,[freemarccaputo],[],https://twitter.com/mattgaetz/status/137698694...,False,https://twitter.com/politico/status/1376972588...,0,,,,,,,,[],,,,
1,1377030478167937024,1377030477257773056,2021-03-30 15:50:24 PDT,2021-03-30,15:50:24,-700,58579942,mattgaetz,Matt Gaetz,,...and my father has even been wearing a wire ...,en,[],[],[],4451,2985,14148,[],[],https://twitter.com/mattgaetz/status/137703047...,False,,0,,,,,,,,[],,,,
2,1377030477257773065,1377030477257773056,2021-03-30 15:50:24 PDT,2021-03-30,15:50:24,-700,58579942,mattgaetz,Matt Gaetz,,Over the past several weeks my family and I ha...,en,[],[],[],21417,7332,26666,[],[],https://twitter.com/mattgaetz/status/137703047...,False,,0,,,,,,,,[],,,,
4,1376314658932527115,1376314658932527104,2021-03-28 16:26:00 PDT,2021-03-28,16:26:00,-700,58579942,mattgaetz,Matt Gaetz,,They are executing a wokeification of the mili...,en,[],[https://www.foxnews.com/politics/matt-gaetz-b...,[],498,959,3707,[],[],https://twitter.com/mattgaetz/status/137631465...,False,,0,,,,,,,,[],,,,


In [5]:
len(src)

7879

### Clean up dates ...

In [6]:
src["date"] = pd.to_datetime(src["date"])
src["year"] = src["date"].dt.year
src["month"] = src["date"].dt.month
src["day"] = src["date"].dt.day

### ... and times

In [7]:
src["time"] = pd.to_datetime(src["time"])
src["hour"] = src["time"].dt.hour
src["minute"] = src["time"].dt.minute
src["time"] = src["time"].dt.time

In [8]:
src["id"] = src["id"].astype(str)
src["year"] = src["year"].astype(str)
src["month"] = src["month"].astype(str)
src["day"] = src["day"].astype(str)
src["hour"] = src["hour"].astype(str)

### Mentions of 'trafficking'

In [9]:
src[src["tweet"].str.contains("trafficking")][["id", "date", "username", "tweet"]].head(
    20
)

Unnamed: 0,id,date,username,tweet
2771,1084912598863228931,2019-01-14,mattgaetz,Dear @GovRonDeSantis - I know you are a stickl...
5584,342677915324006400,2013-06-06,mattgaetz,"Bondi, FDLE launch new human trafficking train..."
5935,287218514660585473,2013-01-04,mattgaetz,"Crim Justice 2 work on this next week ""@fladjj..."
6605,178135899886989312,2012-03-09,mattgaetz,Great article from @nwfdailynews on our effort...
6792,144783751086477312,2011-12-08,mattgaetz,Great to have @PamBondi in the Judiciary Commi...


### Mentions of 'Mueller'

In [10]:
src[src["tweet"].str.contains("Mueller")][["id", "date", "username", "tweet"]].head(20)

Unnamed: 0,id,date,username,tweet
325,1340826367370403840,2020-12-20,mattgaetz,@MuellerSheWrote I haven’t been vaccined and h...
539,1322246816050675712,2020-10-30,mattgaetz,Veronica’s Closet had 35 million initial viewe...
646,1309527927801995264,2020-09-25,mattgaetz,When I said Mueller was out to get Trump in 20...
692,1305530080001753094,2020-09-14,mattgaetz,The Mueller team obstructed way more justice t...
974,1281624871718588417,2020-07-10,mattgaetz,I hope John Durham is contact tracing the Muel...
1867,1208987349725650946,2019-12-22,mattgaetz,Dems: “Trump is a Russian asset” - then Muelle...
1967,1194701059425476608,2019-11-13,mattgaetz,House Democrats are having such a bad day they...
2231,1156377055182884866,2019-07-30,mattgaetz,Tonight’s Democratic candidates are making Rob...
2239,1155965052861632515,2019-07-29,mattgaetz,The dossier itself cites multiple Russian gov’...
2242,1155246040246829063,2019-07-27,mattgaetz,#BringBackMueller


In [11]:
src.reset_index().to_csv("output/mattgaetz.csv", index=False)