In [1]:
# Import Dependencies
import pandas as pd

In [6]:
# Create a path to the csv and read it into a Pandas DataFrame
csv_path = "Resources/ted_talks.csv"
ted_df = pd.read_csv(csv_path)

ted_df.head(100)

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869
5,672,"Tony Robbins discusses the ""invisible forces"" ...",1305,TED2006,36,Tony Robbins,Tony Robbins: Why we do what we do,Why we do what we do,20685401
6,919,When two young Mormon missionaries knock on Ju...,992,TED2006,31,Julia Sweeney,Julia Sweeney: Letting go of God,Letting go of God,3769987
7,46,Architect Joshua Prince-Ramus takes the audien...,1198,TED2006,19,Joshua Prince-Ramus,Joshua Prince-Ramus: Behind the design of Seat...,Behind the design of Seattle's library,967741
8,852,Philosopher Dan Dennett calls for religion -- ...,1485,TED2006,32,Dan Dennett,Dan Dennett: Let's teach religion -- all relig...,Let's teach religion -- all religion -- in sch...,2567958
9,900,"Pastor Rick Warren, author of ""The Purpose-Dri...",1262,TED2006,31,Rick Warren,Rick Warren: A life of purpose,A life of purpose,3095993


In [46]:
# Figure out the minimum and maximum views for a TED Talk
maxviews = ted_df["views"].max()
minviews = ted_df["views"].min()

print("Max Views: " + str(maxviews))
print("Min Views: " + str(minviews))

maxloc = ted_df.loc[ted_df["views"] == ted_df["views"].max()]
minloc = ted_df.loc[ted_df["views"] == ted_df["views"].min()]

maxloc.head()
# minloc.head()

Max Views: 47227110
Min Views: 50443


Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views,View Group
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110,5m-50mil


In [34]:
# Create bins in which to place values based upon TED Talk views
bins = [0, 50000, 500000, 5000000,50000000]

# Create labels for these bins
labels_for_bins =["0 to 50k", "50k to 500k","500k-5m","5m-50mil"]

In [35]:
# Slice the data and place it into bins

sliced_data = pd.cut(ted_df["views"],bins, labels = labels_for_bins)

sliced_data.head()


0    5m-50mil
1     500k-5m
2     500k-5m
3     500k-5m
4    5m-50mil
Name: views, dtype: category
Categories (4, object): [0 to 50k < 50k to 500k < 500k-5m < 5m-50mil]

In [36]:
# Place the data series into a new column inside of the DataFrame

ted_df["View Group"] = sliced_data

ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views,View Group
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110,5m-50mil
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520,500k-5m
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292,500k-5m
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550,500k-5m
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869,5m-50mil


In [51]:
# Create a GroupBy object based upon "View Group"
grouped_data = ted_df.groupby("View Group")


# Find how many rows fall into each bin

howmanyrows = grouped_data["comments"].count()

print(howmanyrows)
# Get the average of each column within the GroupBy object

average = grouped_data["comments","languages","duration"].mean()

print(average)

View Group
0 to 50k          0
50k to 500k     279
500k-5m        2172
5m-50mil         99
Name: comments, dtype: int64
               comments  languages    duration
View Group                                    
0 to 50k            NaN        NaN         NaN
50k to 500k   89.745520  18.763441  833.261649
500k-5m      183.727440  27.837017  823.009669
5m-50mil     650.393939  40.252525  884.282828


In [50]:
ted_df.dtypes

comments           int64
description       object
duration           int64
event             object
languages          int64
main_speaker      object
name              object
title             object
views              int64
View Group      category
dtype: object