# Processing 'doomscrolling' tweets

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_latimes as lat
import matplotlib.pyplot as plt
import matplotlib as mpl
from datetime import datetime

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('latimes')

In [4]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

### Get the archive

In [5]:
src = pd.read_json(
    "../../twint/doomscrolling.json",
    convert_dates=["created_at", "date", "time"],
    dtype={"timezone": str},
    date_unit="s",
    lines=True,
)

In [6]:
src.sort_values("date", ascending=False).head()

Unnamed: 0,id,conversation_id,created_at,date,time,timezone,user_id,username,name,place,tweet,language,mentions,urls,photos,replies_count,retweets_count,likes_count,hashtags,cashtags,link,retweet,quote_url,video,thumbnail,near,geo,source,user_rt_id,user_rt,retweet_id,reply_to,retweet_date,translate,trans_src,trans_dest
0,1393179723518316544,1393179723518316544,2021-05-14 05:21:44 PDT,2021-05-14,05:21:44,-700,1296517517544960001,stopdoomscroll,stopdoomscrolling,,Is now really the time dinosaur! Your session of doomscrolling has expired! Maybe you should delete your facebook account!,en,[],[],[],0,0,0,[],[],https://twitter.com/stopdoomscroll/status/1393179723518316544,False,,0,,,,,,,,[],,,,
25,1393112955345162241,1393112955345162240,2021-05-14 00:56:25 PDT,2021-05-14,00:56:25,-700,870603950,defiantsquid,defiantsquid | Ben Haley fan account,,"Overdid it a bit yesterday with all the zoom meetings, events, VR games and bedtime doomscrolling, so I'm taking things easy with screens today. Might even read a book 🤷‍♂️ Be nice to each other while I'm away! X",en,[],[],[],2,0,9,[],[],https://twitter.com/DefiantSquid/status/1393112955345162241,False,,0,,,,,,,,[],,,,
19,1393133948528517125,1393133948528517120,2021-05-14 02:19:51 PDT,2021-05-14,02:19:51,-700,1150726007910891520,gyukaakuu,doomscrolling lady,,Can be too open but also too tertutup at the same time. False communication..,en,[],[],[],0,0,0,[],[],https://twitter.com/gyukaakuu/status/1393133948528517125,False,https://twitter.com/sadiementary/status/1393065081689174018,0,,,,,,,,[],,,,
20,1393130842138972162,1393124244490395648,2021-05-14 02:07:30 PDT,2021-05-14,02:07:30,-700,14539144,witherthorne,Dils 🐣,,@AsrolWenger Baring penat lepas masak sambil doomscrolling,in,[],[],[],0,0,0,[],[],https://twitter.com/witherthorne/status/1393130842138972162,False,,0,,,,,,,,"[{'screen_name': 'AsrolWenger', 'name': 'Asrol', 'id': '242330827'}]",,,,
21,1393129313885696000,1393129313885696000,2021-05-14 02:01:26 PDT,2021-05-14,02:01:26,-700,5980812,thecultureofme,MF Doomscrolling,,https://t.co/QHo3niaHBA https://t.co/tXCNdqoLnB,und,[],[https://go.tcom.co/2QgnpRY],[https://pbs.twimg.com/media/E1VjupmXsAIPe6r.jpg],0,0,0,[],[],https://twitter.com/thecultureofme/status/1393129313885696000,False,,1,https://pbs.twimg.com/media/E1VjupmXsAIPe6r.jpg,,,,,,,[],,,,


In [7]:
len(src)

108401

### Clean up dates ...

In [8]:
src["date"] = pd.to_datetime(src["date"])
src["year"] = src["date"].dt.year
src["month"] = src["date"].dt.month
src["day"] = src["date"].dt.day

### ... and times

In [9]:
src["time"] = pd.to_datetime(src["time"])
src["hour"] = src["time"].dt.hour
src["minute"] = src["time"].dt.minute
src["time"] = src["time"].dt.time

In [10]:
src["id"] = src["id"].astype(str)
src["year"] = src["year"].astype(str)
src["month"] = src["month"].astype(str)
src["day"] = src["day"].astype(str)
src["hour"] = src["hour"].astype(str)

In [11]:
src = src.drop_duplicates(subset="id")

In [12]:
src.sort_values("retweets_count", ascending=False)[
    ["id", "date", "username", "retweets_count", "tweet"]
].head(10)

Unnamed: 0,id,date,username,retweets_count,tweet
82106,1312936187057893377,2020-10-04,matthewamiller,50284,Elect Joe Biden so you can read books at night again rather than endlessly doomscrolling twitter.
38630,1346904164824178690,2021-01-06,q_kingwv,40563,Sorry boss I can't work I am doomscrolling the coup attempt
34986,1347011666123366402,2021-01-06,faayza,29758,doomscrolling break: this person trying to do an audition tape while their cat just wants attention https://t.co/SC282aotCk
30610,1347434641540538368,2021-01-07,ethanjacobslaw,29111,OK doomscrolling is bad but have you SEEN the quality of the doom this week?
34035,1347059063864119301,2021-01-06,kurtkohlstedt,24230,We interrupt your regularly scheduled doomscrolling to bring you a joyful deer prancing across a beach at sunrise 🦌 🌅 https://t.co/Lnk96yx1f6
37461,1346939675429232640,2021-01-06,nathanallebach,22409,🛑 STOP 🛑 you've reached the doomscrolling checkpoint. this is your reminder to take a break from doomscrolling
98900,1286348333951389696,2020-07-23,catvalente,11890,Every millennial I know is locked in their house doomscrolling &amp; baking bread while their parents think masks are poisonous gay 5g tools of the new world order and getting haircuts at Applebees but sure
35537,1346991070186532865,2021-01-06,jpbrammer,11620,"I'm not ""doomscrolling"" I just live in hell"
98787,1286496613955862530,2020-07-23,marysuewriter,9495,"what’s dangerous about doomscrolling is that repeated emphasis on the importance of staying informed can easily trick you into thinking that endlessly consuming bad news on autopilot is a progressive moral duty, when in actuality it’s the digital equivalent of emotional self-harm"
83550,1312003511622815744,2020-10-02,ncroal,5809,TIL the opposite of doomscrolling is schadensurfing


### Users who've mentioned it the most?

In [13]:
count = src.groupby("username")["id"].size().reset_index(name="tweets")

In [14]:
count.sort_values("tweets", ascending=False).head(20)

Unnamed: 0,username,tweets
54240,nodoomscroll,1157
69797,stopdoomscroll,912
38779,karenkho,608
72670,thecultureofme,317
10859,brian_pelts,121
8360,bee_k_bee,104
59789,rachaelp95,104
79646,writerly_dee,97
49822,mindfulness_for,81
41375,kristaldixon,54


### How many times has the question been posed? 

In [15]:
len(src[src["tweet"].str.contains("are you doomscrolling?")])

307

### Featuring @karenkho

In [16]:
src[src["username"] == "karenkho"][["id", "date", "username", "tweet"]].head(20)

Unnamed: 0,id,date,username,tweet
56,1393040397602017281,2021-05-13,karenkho,"what if instead of a doomscrolling reminder, I just told all of you it's important to write a to-list, moisturize regularly, go to sleep early, and charge your phone away from your bed"
307,1392705739618242563,2021-05-12,karenkho,"Hi, are you still doomscrolling?"
544,1392342134863368192,2021-05-11,karenkho,"Hi, are you still doomscrolling?"
800,1391974300077416448,2021-05-10,karenkho,"Hi, are you doomscrolling?"
1084,1391594903214477313,2021-05-09,karenkho,"Hi, are you doomscrolling?"
1099,1391575191067693058,2021-05-09,karenkho,"instead of a doomscrolling reminder, I'm just going to encourage you to go to sleep early tonight. You deserve extra time to rest."
1287,1391233213855584257,2021-05-08,karenkho,"Hi, are you doomscrolling?"
1499,1390881349049925633,2021-05-07,karenkho,"Hi, are you doomscrolling?"
1673,1390672887892353029,2021-05-07,karenkho,"I lost count of this phenomenon last year due to all the doomscrolling tweets but I also gained 28k followers (to which my sister asked, ""do you make any money from that?"")"
1745,1390528281208934405,2021-05-06,karenkho,"hi, are you still doomscrolling?"


---

### Export

In [17]:
src.reset_index().to_csv("output/doomscrolling.csv", index=False)

In [19]:
src[src["username"] == "karenkho"].reset_index().to_csv(
    "output/karenkho.csv", index=False
)