In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Create a path to the csv and read it into a Pandas DataFrame
csv_path = "Resources/ted_talks.csv"
ted_df = pd.read_csv(csv_path)

ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869


In [3]:
# Figure out the minimum and maximum views for a TED Talk
maxViews = ted_df["views"].max()
minViews = ted_df["views"].min()
print(maxViews)
print(minViews)

47227110
50443


In [4]:
# Create bins in which to place values based upon TED Talk views
bins = [0, 2e5, 4e5, 6e5, 8e5, 1e6, 2e6, 3e6, 4e6, 5e6, 5e7]

# Create labels for these bins
groups = ["0 to 200k", "200k to 400k", "400k to 600k", "600k to 800k", "800k to 1M", "1M to 2M", "2M to 3M", "3M to 4M", "4M to 5M", "5M to 6M"]

In [5]:
# Slice the data and place it into bins
pd.cut(ted_df["views"], bins, labels=groups).head()

0    5M to 6M
1    3M to 4M
2    1M to 2M
3    1M to 2M
4    5M to 6M
Name: views, dtype: category
Categories (10, object): [0 to 200k < 200k to 400k < 400k to 600k < 600k to 800k ... 2M to 3M < 3M to 4M < 4M to 5M < 5M to 6M]

In [6]:
# Place the data series into a new column inside of the DataFrame
ted_df["View Group"] = pd.cut(ted_df["views"], bins, labels=groups)
ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views,View Group
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110,5M to 6M
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520,3M to 4M
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292,1M to 2M
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550,1M to 2M
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869,5M to 6M


In [7]:
# Create a GroupBy object based upon "View Group"
ted_group = ted_df.groupby("View Group")

# Find how many rows fall into each bin
print(ted_group["comments"].count())

# Get the average of each column within the GroupBy object
ted_group[["comments", "duration", "languages"]].mean()

View Group
0 to 200k         32
200k to 400k     135
400k to 600k     234
600k to 800k     307
800k to 1M       339
1M to 2M        1004
2M to 3M         239
3M to 4M          93
4M to 5M          68
5M to 6M          99
Name: comments, dtype: int64


Unnamed: 0_level_0,comments,duration,languages
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0 to 200k,76.9375,898.1875,4.0625
200k to 400k,81.992593,832.192593,18.785185
400k to 600k,107.162393,870.517094,22.940171
600k to 800k,118.912052,829.039088,24.400651
800k to 1M,119.628319,798.772861,25.678466
1M to 2M,168.136454,809.899402,27.899402
2M to 3M,299.481172,832.430962,32.807531
3M to 4M,360.870968,809.505376,34.258065
4M to 5M,507.088235,920.514706,35.720588
5M to 6M,650.393939,884.282828,40.252525
