# Py: Tidy data analysis - Friends

Friends is an American television sitcom, created by David Crane and Marta Kauffman, which aired on NBC from September 22, 1994, to May 6, 2004, lasting ten seasons. With an ensemble cast starring Jennifer Aniston, Courteney Cox, Lisa Kudrow, Matt LeBlanc, Matthew Perry and David Schwimmer, the show revolves around six friends in their 20s and 30s who live in Manhattan, New York City. The series was produced by Bright/Kauffman/Crane Productions, in association with Warner Bros. Television. The original executive producers were Kevin S. Bright, Kauffman, and Crane.

In [1]:
# Importing libraries
import datatable as dt
import pandas as pd
import altair as alt
from datatable import f,by,count,update,sort,join
import re

In [2]:
dt.options.display.head_nrows=4
dt.options.display.tail_nrows=4
dt.init_styles()

In [3]:
# Importign data
amigos_info_dt = dt.fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-08/friends_info.csv')
amigos_dt = dt.fread('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-08/friends.csv')

In [4]:
# Glance
amigos_info_dt

Unnamed: 0_level_0,season,episode,title,directed_by,written_by,air_date,us_views_millions,imdb_rating
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3
1,1,2,The One with the Sonogram at the End,James Burrows,David Crane & Marta Kauffman,1994-09-29,20.2,8.1
2,1,3,The One with the Thumb,James Burrows,Jeffrey Astrof & Mike Sikowitz,1994-10-06,19.5,8.2
3,1,4,The One with George Stephanopoulos,James Burrows,Alexa Junge,1994-10-13,19.7,8.1
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
232,10,15,The One Where Estelle Dies,Gary Halvorson,Story by : Mark KunerthTeleplay by : David Crane &…,2004-04-22,22.64,8.5
233,10,16,The One with Rachel's Going Away Party,Gary Halvorson,Andrew Reich & Ted Cohen,2004-04-29,24.51,8.9
234,10,17,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7
235,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7


In [5]:
# Seasons
amigos_info_dt[:,count(),by(f.season)]

Unnamed: 0_level_0,season,count
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪
0,1,24
1,2,24
2,3,25
3,4,24
4,5,24
5,6,25
6,7,24
7,8,24
8,9,24
9,10,18


In [6]:
# Unique episodes per a season
amigos_info_dt[:,count(),by(f.season,f.episode)
              ][:,{'unique_episodes':count()},by(f.season)
               ]

Unnamed: 0_level_0,season,unique_episodes
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪
0,1,24
1,2,24
2,3,25
3,4,24
4,5,24
5,6,25
6,7,24
7,8,24
8,9,24
9,10,18


In [7]:
# average views and ratings per season
amigos_info_dt[:,dt.mean(f[-2:]),by(f.season)]

Unnamed: 0_level_0,season,us_views_millions,imdb_rating
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,1,24.7917,8.31667
1,2,31.7208,8.45833
2,3,26.308,8.408
3,4,24.95,8.475
4,5,24.7458,8.6375
5,6,22.616,8.496
6,7,22.0512,8.4375
7,8,26.7204,8.45
8,9,23.9304,8.30417
9,10,26.1294,8.68889


In [8]:
# Highest rating title
amigos_info_dt[f.imdb_rating==dt.max(f.imdb_rating),:]

Unnamed: 0_level_0,season,episode,title,directed_by,written_by,air_date,us_views_millions,imdb_rating
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,5,14,The One Where Everybody Finds Out,Michael Lembeck,Alexa Junge,1999-02-11,27.7,9.7
1,10,17,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7
2,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7


In [9]:
# lowest rating title
amigos_info_dt[f.imdb_rating==dt.min(f.imdb_rating),:]

Unnamed: 0_level_0,season,episode,title,directed_by,written_by,air_date,us_views_millions,imdb_rating
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,4,21,The One with the Invitation,Peter Bonerz,Seth Kurland,1998-04-23,21.5,7.2


In [10]:
# Top 2 titles having higher rating per season
amigos_info_dt[:2,:,by(f.season),sort(-f.imdb_rating)]

Unnamed: 0_level_0,season,episode,title,directed_by,written_by,air_date,us_views_millions,imdb_rating
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,1,7,The One with the Blackout,James Burrows,Jeffrey Astrof & Mike Sikowitz,1994-11-03,23.5,9.0
1,1,24,The One Where Rachel Finds Out,Kevin S. Bright,Chris Brown,1995-05-18,31.3,8.9
2,2,14,The One with the Prom Video,James Burrows,Alexa Junge,1996-02-01,33.6,9.4
3,2,7,The One Where Ross Finds Out,Peter Bonerz,Michael Borkow,1995-11-09,30.5,9.0
4,3,6,The One with the Flashback,Peter Bonerz,David Crane & Marta Kauffman,1996-10-31,23.3,9.1
5,3,16,The One with the Morning After,James Burrows,David Crane & Marta Kauffman,1997-02-20,28.3,9.1
6,4,12,The One with the Embryos,Kevin S. Bright,Jill Condon & Amy Toomin,1998-01-15,27.1,9.5
7,4,24,The One with Ross' Wedding,Kevin S. Bright,Michael BorkowStory by : Jill Condon & Amy ToominT…,1998-05-07,31.6,9.2
8,5,14,The One Where Everybody Finds Out,Michael Lembeck,Alexa Junge,1999-02-11,27.7,9.7
9,5,8,The One with All the Thanksgivings,Kevin S. Bright,Gregory S. Malins,1998-11-19,23.9,9.2


In [11]:
# find a title info
amigos_info_dt[f.title=="The Last One",:]

Unnamed: 0_level_0,season,episode,title,directed_by,written_by,air_date,us_views_millions,imdb_rating
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,10,17,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7
1,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7


In [12]:
# select few observations till 235
amigos_info_dt[[slice(None,235)],:]

Unnamed: 0_level_0,season,episode,title,directed_by,written_by,air_date,us_views_millions,imdb_rating
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3
1,1,2,The One with the Sonogram at the End,James Burrows,David Crane & Marta Kauffman,1994-09-29,20.2,8.1
2,1,3,The One with the Thumb,James Burrows,Jeffrey Astrof & Mike Sikowitz,1994-10-06,19.5,8.2
3,1,4,The One with George Stephanopoulos,James Burrows,Alexa Junge,1994-10-13,19.7,8.1
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
231,10,14,The One with Princess Consuela,Gary Halvorson,Story by : Robert CarlockTeleplay by : Tracy Reill…,2004-02-26,22.83,8.6
232,10,15,The One Where Estelle Dies,Gary Halvorson,Story by : Mark KunerthTeleplay by : David Crane &…,2004-04-22,22.64,8.5
233,10,16,The One with Rachel's Going Away Party,Gary Halvorson,Andrew Reich & Ted Cohen,2004-04-29,24.51,8.9
234,10,17,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7


In [13]:
alt.Chart(amigos_info_dt[:,[f.season,f.episode,f.us_views_millions]].to_pandas()).mark_point().encode(
    alt.X('episode'),
    alt.Y('us_views_millions')
).properties(
    title=' Episode and views'
)

In [14]:
# Average views and rating per directors
directors_views_rating = amigos_info_dt[:,dt.mean(f[-2:]),by(f.directed_by)
                                       ][:,:,dt.sort(-f.imdb_rating)
                                        ]

In [15]:
# Top 10 directors who have made more titles
directors_dt = amigos_info_dt[:,count(),by(f.directed_by)
                             ][:10,:,dt.sort(-f.count)
                              ]

In [16]:
# setting a key on DT
directors_dt.key='directed_by'

In [17]:
# First 5 and last 5 observations
directors_views_rating[[slice(5),slice(25,None)],:]

Unnamed: 0_level_0,directed_by,us_views_millions,imdb_rating
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,Joe Regalbuto,27.0,9.1
1,Kevin S. Bright,27.3308,8.6566
2,Pamela Fryman,23.7,8.65
3,Andrew Tsao,26.0,8.6
4,David Schwimmer,24.435,8.55
5,Robby Benson,27.9333,8.18333
6,Arlene Sanford,18.2,8.1
7,Steve Zuckerman,24.7,8.1
8,Thomas Schlamme,31.9,8.05
9,Todd Holland,21.9,8.0


In [18]:
# directors and their avg title rating and total titles
directors_views_rating_v1 = directors_views_rating[:,:,dt.join(directors_dt)
                                                  ][~dt.isna(f.count),:
                                                   ][:,:,dt.sort(-f.count)
                                                    ]

In [19]:
directors_views_rating_v1

Unnamed: 0_level_0,directed_by,us_views_millions,imdb_rating,count
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,Gary Halvorson,22.3561,8.40185,54
1,Kevin S. Bright,27.3308,8.6566,53
2,Michael Lembeck,29.1513,8.52083,24
3,James Burrows,24.78,8.50667,15
4,Gail Mancuso,25.5214,8.35714,14
5,Peter Bonerz,25.0167,8.325,12
6,David Schwimmer,24.435,8.55,10
7,Ben Weiss,23.788,8.3,10
8,Shelley Jensen,25.5667,8.43333,6
9,Robby Benson,27.9333,8.18333,6


In [20]:
alt.Chart(directors_views_rating_v1.to_pandas()).mark_bar().encode(
    alt.Y('directed_by',sort='-x'),
    alt.X('count'),
    alt.Color('imdb_rating')
).properties(
    
    title='Top directors title counts and imdb ratings'
)

In [21]:
alt.Chart(directors_views_rating_v1.to_pandas()).mark_bar().encode(
    alt.Y('directed_by',sort='-x'),
    alt.X('count'),
    alt.Color('us_views_millions')
).properties(
    
    title='Top directors title counts and US million views'
)

In [22]:
# are the directors and writers same for a title ?
amigos_info_dt[:,dt.update(temp=f.directed_by==f.written_by)]

In [23]:
# are the directors and writers same for a title ?
amigos_info_dt[f.temp==True,:]

Unnamed: 0_level_0,season,episode,title,directed_by,written_by,air_date,us_views_millions,imdb_rating,temp
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪


In [24]:
# remove the temp col
del amigos_info_dt["temp"]

In [25]:
# split writers column
writers_list = [ elemento.split('&') for elemento in amigos_info_dt[:,f.written_by].to_list()[0] ]

In [26]:
# create a new DT with writers
writers_dt = dt.Frame({'no_of_writers':[len(elem) for elem in writers_list]})

In [27]:
# Joining two DTs
amigos_info_dt_v1 = dt.cbind(amigos_info_dt,writers_dt)

In [28]:
# No of writers 
alt.Chart(amigos_info_dt_v1[:,count(),by(f.no_of_writers)].to_pandas()).mark_bar().encode(

    alt.X('count'),
    alt.Y('no_of_writers:O')
).properties(
    
    title='Number of writers in titles'
)

In [29]:
amigos_year = dt.Frame({'year':[re.findall(r'[\d]{4}',fecha)[0] for fecha in amigos_info_dt_v1[:,f.air_date].to_list()[0] ]})

In [30]:
alt.Chart(amigos_year[:,count(),by(f.year)].to_pandas()).mark_line().encode(alt.X('year'),alt.Y('count'))

In [31]:
amigos_dt

Unnamed: 0_level_0,text,speaker,season,episode,scene,utterance
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,There's nothing to tell! He's just some guy I work…,Monica Geller,1,1,1,1
1,"C'mon, you're going out with the guy! There's gott…",Joey Tribbiani,1,1,1,2
2,"All right Joey, be nice. So does he have a hump? A…",Chandler Bing,1,1,1,3
3,"Wait, does he eat chalk?",Phoebe Buffay,1,1,1,4
⋮,⋮,⋮,⋮,⋮,⋮,⋮
67369,Do you guys have to go to the new house right away…,Rachel Green,10,18,11,18
67370,We got some time.,Monica Geller,10,18,11,19
67371,"Okay, should we get some coffee?",Rachel Green,10,18,11,20
67372,Sure. Where?,Chandler Bing,10,18,11,21


In [32]:
amigos_info_df = amigos_info_dt.to_pandas()

In [33]:
amigos_df = amigos_dt.to_pandas()

In [34]:
amigos_todo_dt = dt.Frame( amigos_info_df.join(amigos_df.set_index(['season','episode']),
                                               on=['season','episode']) 
                         )

In [44]:
amigos_todo_dt

Unnamed: 0_level_0,season,episode,title,directed_by,written_by,air_date,us_views_millions,imdb_rating,text,speaker,scene,utterance
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3,There's nothing to tell! He's just some guy I work…,Monica Geller,1,1
1,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3,"C'mon, you're going out with the guy! There's gott…",Joey Tribbiani,1,2
2,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3,"All right Joey, be nice. So does he have a hump? A…",Chandler Bing,1,3
3,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3,"Wait, does he eat chalk?",Phoebe Buffay,1,4
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
67369,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7,Do you guys have to go to the new house right away…,Rachel Green,11,18
67370,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7,We got some time.,Monica Geller,11,19
67371,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7,"Okay, should we get some coffee?",Rachel Green,11,20
67372,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7,Sure. Where?,Chandler Bing,11,21


In [50]:
amigos_todo_dt[~dt.isna(f.speaker),:]

Unnamed: 0_level_0,season,episode,title,directed_by,written_by,air_date,us_views_millions,imdb_rating,text,speaker,scene,utterance
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪
0,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3,There's nothing to tell! He's just some guy I work…,Monica Geller,1,1
1,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3,"C'mon, you're going out with the guy! There's gott…",Joey Tribbiani,1,2
2,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3,"All right Joey, be nice. So does he have a hump? A…",Chandler Bing,1,3
3,1,1,The Pilot,James Burrows,David Crane & Marta Kauffman,1994-09-22,21.5,8.3,"Wait, does he eat chalk?",Phoebe Buffay,1,4
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
67093,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7,Do you guys have to go to the new house right away…,Rachel Green,11,18
67094,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7,We got some time.,Monica Geller,11,19
67095,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7,"Okay, should we get some coffee?",Rachel Green,11,20
67096,10,18,The Last One,Kevin S. Bright,Marta Kauffman & David Crane,2004-05-06,52.46,9.7,Sure. Where?,Chandler Bing,11,21
