# Scrape and process @jongraz stats for Noodle videos

In [1]:
from TikTokApi import TikTokApi
import pandas as pd
import numpy as np
import altair as alt

In [2]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

### Read in our hand collected list of Noodle videos

In [3]:
src = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/e/2PACX-1vQz8eEafXnIeKsNvGL8M56Ia6vu89JDCjfJ22ORl65So4kRYtmTbwmqLjE223fe4mjcZqkrs-KT6j8z/pub?\
gid=0&single=true&output=csv"
)

### Create an ID field from the URL

In [4]:
src['id'] = src['url'].str.replace('https://www.tiktok.com/@jongraz/video/', "", regex=False).astype(str)

### Make a list for subsetting the dataframe later

In [5]:
bones_videos = src['id'].to_list()

### Have to run this manually -- for now

In [6]:
# Get cookie and run this...

In [7]:
# !python '/Users/stiles/github/noodle-tracker/_workspace/grab_user_stats.py'

---

### Read scraped stats data

In [8]:
df = pd.read_csv('../../tiktok/jongraz_videos.csv', dtype={'video_id':str})

### Clean update dates, times

In [9]:
df['timestamp'] = pd.to_datetime(df['video_time'], unit='s')
df['date'] = pd.to_datetime(df['video_time'], unit='s').dt.date
df['month'] = pd.to_datetime(df['video_time'], unit='s').dt.month
df['weekday'] = pd.to_datetime(df['video_time'], unit='s').dt.day_name()
df['time'] = pd.to_datetime(df['video_time'], unit='s').dt.time
df['weekstart_timestamp'] = pd.to_datetime(df['video_time'], unit='s') - pd.to_datetime(df['video_time'], unit='s').dt.weekday * np.timedelta64(1, 'D')
df['weekstart'] = df['weekstart_timestamp'].dt.date
df['updated_date'] = pd.to_datetime("today")

### How many videos? 

In [10]:
len(df)

149

### Select only "bones" videos from @jongraz's feed

In [11]:
bones_df = df[df['video_id'].isin(bones_videos)]

### How many are Noodle videos? 

In [12]:
len(bones_df)

61

In [13]:
bones_df.head()

Unnamed: 0,user_name,user_id,video_id,video_desc,video_time,video_length,video_link,n_likes,n_shares,n_comments,n_plays,timestamp,date,month,weekday,time,weekstart_timestamp,weekstart,updated_date
0,jongraz,6867588218972455941,7043061913114119429,may this set the tone for your weekend 🔮🦴🔮 also the auto captions on this one SLAYED me 😂 #bonesday #nobones #noodletok,1639840639,49,https://www.tiktok.com/@jongraz/video/7043061913114119429?lang=en,109100,4578,2108,373500,2021-12-18 15:17:19,2021-12-18,12,Saturday,15:17:19,2021-12-13 15:17:19,2021-12-13,2021-12-18 20:36:25.537821
1,jongraz,6867588218972455941,7042308571643514118,we are…all shocked!!! 🔮🦴🔮 #noodletok,1639665239,56,https://www.tiktok.com/@jongraz/video/7042308571643514118?lang=en,302300,19600,6894,1100000,2021-12-16 14:33:59,2021-12-16,12,Thursday,14:33:59,2021-12-13 14:33:59,2021-12-13,2021-12-18 20:36:25.537821
2,jongraz,6867588218972455941,7041944285268184325,"once again, the Bones tell us what to do 🔮🦴🔮 #bonesday #nobones #noodletok #pug",1639580422,43,https://www.tiktok.com/@jongraz/video/7041944285268184325?lang=en,62800,3501,1808,318800,2021-12-15 15:00:22,2021-12-15,12,Wednesday,15:00:22,2021-12-13 15:00:22,2021-12-13,2021-12-18 20:36:25.537821
3,jongraz,6867588218972455941,7041221365935934725,the Bones have spoken and to the Bones we listen 🔮🦴🔮 #bonesday #nobones #noodletok,1639412104,45,https://www.tiktok.com/@jongraz/video/7041221365935934725?lang=en,128700,5377,2892,613700,2021-12-13 16:15:04,2021-12-13,12,Monday,16:15:04,2021-12-13 16:15:04,2021-12-13,2021-12-18 20:36:25.537821
4,jongraz,6867588218972455941,7040833887395712261,your Sunday reading 🔮🦴🔮,1639321887,43,https://www.tiktok.com/@jongraz/video/7040833887395712261?lang=en,140200,6089,2467,611500,2021-12-12 15:11:27,2021-12-12,12,Sunday,15:11:27,2021-12-06 15:11:27,2021-12-06,2021-12-18 20:36:25.537821


---

### Weekly plays

In [14]:
weekly_plays = bones_df[bones_df['timestamp'] > '2021/07/31'].groupby(['weekstart']).agg({'n_plays':sum}).reset_index()
weekly_plays.dtypes

weekstart    object
n_plays       int64
dtype: object

In [15]:
weekly_plays['weekstart'] = pd.to_datetime(weekly_plays['weekstart'])

In [16]:
alt.Chart(weekly_plays).mark_bar(width=30).encode(
    x='weekstart:T',
    y='n_plays'
).properties(width=630)

---

### Weekly likes

In [17]:
weekly_likes = bones_df[bones_df['timestamp'] > '2021/07/31'].groupby(['weekstart']).agg({'n_likes':sum}).reset_index()
weekly_likes.dtypes

weekstart    object
n_likes       int64
dtype: object

In [18]:
weekly_likes['weekstart'] = pd.to_datetime(weekly_likes['weekstart'])

In [19]:
alt.Chart(weekly_likes).mark_bar(width=30).encode(
    x='weekstart:T',
    y='n_likes'
).properties(width=630)

---

### Weekly comments

In [20]:
weekly_comments = bones_df[bones_df['timestamp'] > '2021/07/31'].groupby(['weekstart']).agg({'n_comments':sum}).reset_index()
weekly_comments.dtypes

weekstart     object
n_comments     int64
dtype: object

In [21]:
weekly_comments['weekstart'] = pd.to_datetime(weekly_comments['weekstart'])

In [22]:
alt.Chart(weekly_comments).mark_bar(width=30).encode(
    x='weekstart:T',
    y='n_comments'
).properties(width=630)

---

### Exports

In [23]:
weekly_plays.to_csv("../_data/weekly_plays.csv", index=False)
weekly_plays.to_csv("../assets/data/weekly_plays.csv", index=False)

In [24]:
weekly_likes.to_csv("../_data/weekly_likes.csv", index=False)
weekly_likes.to_csv("../assets/data/weekly_likes.csv", index=False)

In [25]:
weekly_comments.to_csv("../_data/weekly_comments.csv", index=False)
weekly_comments.to_csv("../assets/data/weekly_comments.csv", index=False)