# Week 5: Diving into Pandas - DEMO

#### Load our Python tools

In [1]:
import pandas as pd

## Let's explore TikTok data

#### Videos by [@jongraz](https://www.tiktok.com/@jongraz) involving Noodle the Pug - DON'T FORGET THE DATA PATH

In [2]:
videos = pd.read_csv('../data/raw/jon_graz_noodle_videos.csv')

#### TikTok stats for att [@jongraz](https://www.tiktok.com/@jongraz) videos

In [3]:
stats = pd.read_csv('../data/raw/jon_graz_stats.csv')

---

## Videos

#### Glance at the first five rows

In [4]:
videos.head()

Unnamed: 0,date,bones_nobones,url,id
0,1/22/2022,No bones,https://www.tiktok.com/@jongraz/video/70560513...,7056051376929066287
1,1/21/2022,Bones!,https://www.tiktok.com/@jongraz/video/70556789...,7055678935107030319
2,1/18/2022,No bones,https://www.tiktok.com/@jongraz/video/70545760...,705457600349412075
3,1/19/2022,Bones!,https://www.tiktok.com/@jongraz/video/70549392...,7054939278937165102
4,1/16/2022,No bones,https://www.tiktok.com/@jongraz/video/70538379...,7053837902991658287


In [5]:
videos.tail()

Unnamed: 0,date,bones_nobones,url,id
73,9/14/2021,Bones!,https://www.tiktok.com/@jongraz/video/70077862...,7007786296059940101
74,9/5/2021,No bones,https://www.tiktok.com/@jongraz/video/70044533...,7004453353946189062
75,8/21/2021,No bones,https://www.tiktok.com/@jongraz/video/69988717...,6998871742793993478
76,8/13/2021,No bones,https://www.tiktok.com/@jongraz/video/69959251...,6995925135182056709
77,3/11/2021,No bones,https://www.tiktok.com/@jongraz/video/69385478...,6938547829673626885


#### How many total videos are there? 

In [6]:
len(videos)

78

#### What's the oldest video? 

In [7]:
videos.sort_values("date",ascending=False).head()

Unnamed: 0,date,bones_nobones,url,id
74,9/5/2021,No bones,https://www.tiktok.com/@jongraz/video/70044533...,7004453353946189062
71,9/29/2021,No bones,https://www.tiktok.com/@jongraz/video/70133630...,7013363016121044229
72,9/22/2021,No bones,https://www.tiktok.com/@jongraz/video/70107548...,7010754857476885766
73,9/14/2021,Bones!,https://www.tiktok.com/@jongraz/video/70077862...,7007786296059940101
75,8/21/2021,No bones,https://www.tiktok.com/@jongraz/video/69988717...,6998871742793993478


#### Newest? 

In [8]:
videos.sort_values("date",ascending=True).head()

Unnamed: 0,date,bones_nobones,url,id
10,1/1/2022,Bones!,https://www.tiktok.com/@jongraz/video/70482717...,7048271702387739910
7,1/11/2022,No bones,https://www.tiktok.com/@jongraz/video/70519729...,7051972968347503919
6,1/13/2022,Bones!,https://www.tiktok.com/@jongraz/video/70523416...,7052341674634235183
5,1/15/2022,Bones!,https://www.tiktok.com/@jongraz/video/70534547...,7053454719641029934
4,1/16/2022,No bones,https://www.tiktok.com/@jongraz/video/70538379...,7053837902991658287


#### Data types

In [9]:
videos.dtypes

date             object
bones_nobones    object
url              object
id                int64
dtype: object

#### Convert the date string to a real date in a new column

In [10]:
videos['clean_date'] = pd.to_datetime(videos['date'])
videos['clean_date']

0    2022-01-22
1    2022-01-21
2    2022-01-18
3    2022-01-19
4    2022-01-16
        ...    
73   2021-09-14
74   2021-09-05
75   2021-08-21
76   2021-08-13
77   2021-03-11
Name: clean_date, Length: 78, dtype: datetime64[ns]

In [11]:
videos.dtypes

date                     object
bones_nobones            object
url                      object
id                        int64
clean_date       datetime64[ns]
dtype: object

In [12]:
videos['id'] = videos['id'].astype(str)
videos.dtypes

date                     object
bones_nobones            object
url                      object
id                       object
clean_date       datetime64[ns]
dtype: object

#### Now will it sort? Try with oldest

In [13]:
videos.sort_values("clean_date",ascending=True).head()

Unnamed: 0,date,bones_nobones,url,id,clean_date
77,3/11/2021,No bones,https://www.tiktok.com/@jongraz/video/69385478...,6938547829673626885,2021-03-11
76,8/13/2021,No bones,https://www.tiktok.com/@jongraz/video/69959251...,6995925135182056709,2021-08-13
75,8/21/2021,No bones,https://www.tiktok.com/@jongraz/video/69988717...,6998871742793993478,2021-08-21
74,9/5/2021,No bones,https://www.tiktok.com/@jongraz/video/70044533...,7004453353946189062,2021-09-05
73,9/14/2021,Bones!,https://www.tiktok.com/@jongraz/video/70077862...,7007786296059940101,2021-09-14


#### What if we just want the month, or year, or day of the week?

In [14]:
videos['year'] = videos['clean_date'].dt.year
videos.year

0     2022
1     2022
2     2022
3     2022
4     2022
      ... 
73    2021
74    2021
75    2021
76    2021
77    2021
Name: year, Length: 78, dtype: int64

In [15]:
videos['month'] = videos['clean_date'].dt.month_name()
videos.month

0       January
1       January
2       January
3       January
4       January
        ...    
73    September
74    September
75       August
76       August
77        March
Name: month, Length: 78, dtype: object

In [16]:
videos['weekday'] = videos['clean_date'].dt.day_name()
videos.weekday

0      Saturday
1        Friday
2       Tuesday
3     Wednesday
4        Sunday
        ...    
73      Tuesday
74       Sunday
75     Saturday
76       Friday
77     Thursday
Name: weekday, Length: 78, dtype: object

In [17]:
videos.head()

Unnamed: 0,date,bones_nobones,url,id,clean_date,year,month,weekday
0,1/22/2022,No bones,https://www.tiktok.com/@jongraz/video/70560513...,7056051376929066287,2022-01-22,2022,January,Saturday
1,1/21/2022,Bones!,https://www.tiktok.com/@jongraz/video/70556789...,7055678935107030319,2022-01-21,2022,January,Friday
2,1/18/2022,No bones,https://www.tiktok.com/@jongraz/video/70545760...,705457600349412075,2022-01-18,2022,January,Tuesday
3,1/19/2022,Bones!,https://www.tiktok.com/@jongraz/video/70549392...,7054939278937165102,2022-01-19,2022,January,Wednesday
4,1/16/2022,No bones,https://www.tiktok.com/@jongraz/video/70538379...,7053837902991658287,2022-01-16,2022,January,Sunday


#### How many bones days vs. no bones days? 

In [22]:
videos.value_counts('bones_nobones')

bones_nobones
Bones!      40
No bones    38
dtype: int64

In [29]:
videos.value_counts('bones_nobones', normalize=True).round(2) * 100

bones_nobones
Bones!      51.0
No bones    49.0
dtype: float64

In [30]:
videos.value_counts('weekday', normalize=True).round(2) * 100

weekday
Thursday     18.0
Monday       17.0
Friday       15.0
Tuesday      15.0
Saturday     14.0
Wednesday    12.0
Sunday        9.0
dtype: float64

---

## TikTok API stats

#### First five rows?

In [18]:
stats.head()

Unnamed: 0,user_name,video_id,video_desc,video_time,video_duration,n_shares,n_comments,n_plays,video_link,timestamp,date,month,weekday,time,weekstart_timestamp,weekstart,updated_date
0,jongraz,7050491114775842095,we must HEED THE BONES!! 🔮🦴🔮,1641570386,45,4122,1678,417600,https://www.tiktok.com/@jongraz/video/70504911...,2022-01-07 15:46:26.000000,2022-01-07,1,Friday,15:46:26,2022-01-03 15:46:26.000000,2022-01-03,2022-02-10 15:28:01.606582
1,jongraz,7048998918729764102,noodle is…is he all of us today? I think he ma...,1641222956,46,5531,1910,621200,https://www.tiktok.com/@jongraz/video/70489989...,2022-01-03 15:15:56.000000,2022-01-03,1,Monday,15:15:56,2022-01-03 15:15:56.000000,2022-01-03,2022-02-10 15:28:01.606582
2,jongraz,7048271702387739910,🚨🚨🚨 we needed THIS!! 🔮🦴🎉 #noodletok #newyearsa...,1641053638,55,11700,2559,791800,https://www.tiktok.com/@jongraz/video/70482717...,2022-01-01 16:13:58.000000,2022-01-01,1,Saturday,16:13:58,2021-12-27 16:13:58.000000,2021-12-27,2022-02-10 15:28:01.606582
3,jongraz,7047890544210496773,your ✨FINAL READING✨ of 2021 🔮🦴🔮,1640964892,59,7085,2550,557900,https://www.tiktok.com/@jongraz/video/70478905...,2021-12-31 15:34:52.000000,2021-12-31,12,Friday,15:34:52,2021-12-27 15:34:52.000000,2021-12-27,2022-02-10 15:28:01.606582
4,jongraz,7046779675493895430,plan or adjust your day accordingly 🔮🦴🔮 #noodl...,1640706249,55,3696,1299,528900,https://www.tiktok.com/@jongraz/video/70467796...,2021-12-28 15:44:09.000000,2021-12-28,12,Tuesday,15:44:09,2021-12-27 15:44:09.000000,2021-12-27,2022-02-10 15:28:01.606582


In [19]:
stats.tail()

Unnamed: 0,user_name,video_id,video_desc,video_time,video_duration,n_shares,n_comments,n_plays,video_link,timestamp,date,month,weekday,time,weekstart_timestamp,weekstart,updated_date
62,jongraz,7007786296059940101,no one saw it coming!! #pug #dog #noodletok #b...,1631627395,33,7701,929,972600,https://www.tiktok.com/@jongraz/video/70077862...,2021-09-14 13:49:55.000000,2021-09-14,9,Tuesday,13:49:55,2021-09-13 13:49:55.000000,2021-09-13,2022-02-10 15:28:01.606582
63,jongraz,7004453353946189062,one day we’re gonna have a full skeleton I swe...,1630851384,35,1935,289,477300,https://www.tiktok.com/@jongraz/video/70044533...,2021-09-05 14:16:24.000000,2021-09-05,9,Sunday,14:16:24,2021-08-30 14:16:24.000000,2021-08-30,2022-02-10 15:28:01.606582
64,jongraz,6998871742793993478,they’re nowhere to be found! #pug #seniordog #...,1629551814,25,7958,535,900000,https://www.tiktok.com/@jongraz/video/69988717...,2021-08-21 13:16:54.000000,2021-08-21,8,Saturday,13:16:54,2021-08-16 13:16:54.000000,2021-08-16,2022-02-10 15:28:01.606582
65,jongraz,6995925135182056709,he had them just last night!! #pug #seniordog ...,1628865753,22,34600,1445,2300000,https://www.tiktok.com/@jongraz/video/69959251...,2021-08-13 14:42:33.000000,2021-08-13,8,Friday,14:42:33,2021-08-09 14:42:33.000000,2021-08-09,2022-02-10 15:28:01.606582
66,jongraz,6938547829673626885,Do your dogs do this too?? #pug #dog #humor #d...,1615506561,41,12900,1767,763700,https://www.tiktok.com/@jongraz/video/69385478...,2021-03-11 23:49:21.000000,2021-03-11,3,Thursday,23:49:21,2021-03-08 23:49:21.000000,2021-03-08,2022-02-10 15:28:01.606582


#### 1. Merge with Noodle videos — two solutions

In [34]:
stats['video_id'] = stats['video_id'].astype(str)
merge_df1 = pd.merge(videos, stats, left_on='id', right_on='video_id')

In [35]:
merge_df1.head()

Unnamed: 0,date_x,bones_nobones,url,id,clean_date,year,month_x,weekday_x,Bones_percentage,bones_percent,...,n_plays,video_link,timestamp,date_y,month_y,weekday_y,time,weekstart_timestamp,weekstart,updated_date
0,1/7/2022,Bones!,https://www.tiktok.com/@jongraz/video/70504911...,7050491114775842095,2022-01-07,2022,January,Friday,,,...,417600,https://www.tiktok.com/@jongraz/video/70504911...,2022-01-07 15:46:26.000000,2022-01-07,1,Friday,15:46:26,2022-01-03 15:46:26.000000,2022-01-03,2022-02-10 15:28:01.606582
1,1/3/2022,No bones,https://www.tiktok.com/@jongraz/video/70489989...,7048998918729764102,2022-01-03,2022,January,Monday,,,...,621200,https://www.tiktok.com/@jongraz/video/70489989...,2022-01-03 15:15:56.000000,2022-01-03,1,Monday,15:15:56,2022-01-03 15:15:56.000000,2022-01-03,2022-02-10 15:28:01.606582
2,1/1/2022,Bones!,https://www.tiktok.com/@jongraz/video/70482717...,7048271702387739910,2022-01-01,2022,January,Saturday,,,...,791800,https://www.tiktok.com/@jongraz/video/70482717...,2022-01-01 16:13:58.000000,2022-01-01,1,Saturday,16:13:58,2021-12-27 16:13:58.000000,2021-12-27,2022-02-10 15:28:01.606582
3,12/31/2021,No bones,https://www.tiktok.com/@jongraz/video/70478905...,7047890544210496773,2021-12-31,2021,December,Friday,,,...,557900,https://www.tiktok.com/@jongraz/video/70478905...,2021-12-31 15:34:52.000000,2021-12-31,12,Friday,15:34:52,2021-12-27 15:34:52.000000,2021-12-27,2022-02-10 15:28:01.606582
4,12/28/2021,No bones,https://www.tiktok.com/@jongraz/video/70467796...,7046779675493895430,2021-12-28,2021,December,Tuesday,,,...,528900,https://www.tiktok.com/@jongraz/video/70467796...,2021-12-28 15:44:09.000000,2021-12-28,12,Tuesday,15:44:09,2021-12-27 15:44:09.000000,2021-12-27,2022-02-10 15:28:01.606582


#### 2. Filter from a list

#### Which video got the most views? Two strategies