In [34]:
import numpy as np
import pandas as pd
import ast
from collections import OrderedDict

In [35]:
df = pd.read_csv("talks_info.csv")
df[0:5]

Unnamed: 0,_id,duration,event,likes,page_url,published_date,recorded_date,related_videos,speakers,subtitle_languages,summary,title,topics,transcript,views,youtube_video_code
0,21,992,TED2006,17K,https://www.ted.com/talks/mena_trott_meet_the_...,2006-08-25T00:11:00Z,2006-02-23,"[""144"",""1282"",""1379"",""87"",""2302"",""2638""]","[{""name"":""Mena Trott"",""occupation"":""Blogger; c...","[{""name"":""English"",""code"":""en""},{""name"":""Vietn...","The founding mother of the blog revolution, Mo...",Meet the founder of the blog revolution,"[{""id"":""4"",""name"":""culture""},{""id"":""13"",""name""...","Over the past couple of days, as I've been pre...",589115,RlBTxuWCuL8
1,1,957,TED2006,110K,https://www.ted.com/talks/al_gore_averting_the...,2006-06-27T00:11:00Z,2006-02-25,"[""243"",""547"",""2093"",""74405"",""64693"",""83767""]","[{""name"":""Al Gore"",""occupation"":""Climate advoc...","[{""name"":""Greek"",""code"":""el""},{""name"":""Gujarat...",With the same humor and humanity he exuded in ...,Averting the climate crisis,"[{""id"":""3"",""name"":""climate change""},{""id"":""4"",...","Thank you so much, Chris. And it's truly a gre...",3671801,rDiGYuQicpA
2,7,1266,TED2006,60K,https://www.ted.com/talks/david_pogue_simplici...,2006-06-27T00:11:00Z,2006-02-24,"[""1725"",""2274"",""172"",""2664"",""2464"",""1268""]","[{""name"":""David Pogue"",""occupation"":""Technolog...","[{""name"":""Greek"",""code"":""el""},{""name"":""English...",New York Times columnist David Pogue takes aim...,Simplicity sells,"[{""id"":""10"",""name"":""technology""},{""id"":""36"",""n...","(Music: ""The Sound of Silence,"" Simon &amp; Ga...",2008487,NEjZt0y6OOw
3,47,1126,TEDGlobal 2005,80K,https://www.ted.com/talks/david_deutsch_chemic...,2006-09-12T00:11:00Z,2005-07-14,"[""2237"",""701"",""1095"",""1386"",""76211"",""242""]","[{""name"":""David Deutsch"",""occupation"":""Physici...","[{""name"":""Greek"",""code"":""el""},{""name"":""English...",Legendary scientist David Deutsch puts theoret...,Chemical scum that dream of distant quasars,"[{""id"":""3"",""name"":""climate change""},{""id"":""4"",...",We've been told to go out on a limb and say so...,2694257,gQliI_WGaGk
4,55,1524,TED2006,14K,https://www.ted.com/talks/jehane_noujaim_my_wi...,2006-07-25T00:11:00Z,2006-02-26,"[""2228"",""1476"",""800"",""2890"",""45233"",""2694""]","[{""name"":""Jehane Noujaim"",""occupation"":""Filmma...","[{""name"":""English"",""code"":""en""},{""name"":""Vietn...",Jehane Noujaim unveils her 2006 TED Prize wish...,My wish: A global day of film,"[{""id"":""4"",""name"":""culture""},{""id"":""6"",""name"":...",I can't help but this wish: to think about whe...,489757,QCFSrb6B5nw


In [36]:
df.rename(columns={'_id':'id'}, inplace = True)

In [37]:
df['topics'][0]

'[{"id":"4","name":"culture"},{"id":"13","name":"design"},{"id":"18","name":"business"},{"id":"37","name":"entertainment"},{"id":"42","name":"software"},{"id":"85","name":"storytelling"},{"id":"86","name":"communication"},{"id":"87","name":"community"}]'

In [38]:
print(type(df['topics'][0]))

<class 'str'>


In [39]:
# convert topics column to be read as list instead of string
df['topics'] = df['topics'].apply(ast.literal_eval)

In [40]:
# convert string representation of numbers in the thousands to numeric values
def likes_to_numeric(likes_str):
    # check if the string ends with 'K' to indicate thousands
    if likes_str.endswith('K'):
        # extract the numeric part of the string and convert it to an integer
        return int(float(likes_str[:-1]) * 1000)
    # check if string ends with 'M' to indicate millions
    elif likes_str.endswith('M'):
        return int(float(likes_str[:-1]) * 1000000)
    else:
        # if the string does not end with 'k', convert it to an integer directly
        return int(likes_str)

# apply function to the 'likes' column
df['likes'] = df['likes'].apply(likes_to_numeric)
# convert views to numeric
df['views'] = df['views'].astype(int)
df[0:10]

Unnamed: 0,id,duration,event,likes,page_url,published_date,recorded_date,related_videos,speakers,subtitle_languages,summary,title,topics,transcript,views,youtube_video_code
0,21,992,TED2006,17000,https://www.ted.com/talks/mena_trott_meet_the_...,2006-08-25T00:11:00Z,2006-02-23,"[""144"",""1282"",""1379"",""87"",""2302"",""2638""]","[{""name"":""Mena Trott"",""occupation"":""Blogger; c...","[{""name"":""English"",""code"":""en""},{""name"":""Vietn...","The founding mother of the blog revolution, Mo...",Meet the founder of the blog revolution,"[{'id': '4', 'name': 'culture'}, {'id': '13', ...","Over the past couple of days, as I've been pre...",589115,RlBTxuWCuL8
1,1,957,TED2006,110000,https://www.ted.com/talks/al_gore_averting_the...,2006-06-27T00:11:00Z,2006-02-25,"[""243"",""547"",""2093"",""74405"",""64693"",""83767""]","[{""name"":""Al Gore"",""occupation"":""Climate advoc...","[{""name"":""Greek"",""code"":""el""},{""name"":""Gujarat...",With the same humor and humanity he exuded in ...,Averting the climate crisis,"[{'id': '3', 'name': 'climate change'}, {'id':...","Thank you so much, Chris. And it's truly a gre...",3671801,rDiGYuQicpA
2,7,1266,TED2006,60000,https://www.ted.com/talks/david_pogue_simplici...,2006-06-27T00:11:00Z,2006-02-24,"[""1725"",""2274"",""172"",""2664"",""2464"",""1268""]","[{""name"":""David Pogue"",""occupation"":""Technolog...","[{""name"":""Greek"",""code"":""el""},{""name"":""English...",New York Times columnist David Pogue takes aim...,Simplicity sells,"[{'id': '10', 'name': 'technology'}, {'id': '3...","(Music: ""The Sound of Silence,"" Simon &amp; Ga...",2008487,NEjZt0y6OOw
3,47,1126,TEDGlobal 2005,80000,https://www.ted.com/talks/david_deutsch_chemic...,2006-09-12T00:11:00Z,2005-07-14,"[""2237"",""701"",""1095"",""1386"",""76211"",""242""]","[{""name"":""David Deutsch"",""occupation"":""Physici...","[{""name"":""Greek"",""code"":""el""},{""name"":""English...",Legendary scientist David Deutsch puts theoret...,Chemical scum that dream of distant quasars,"[{'id': '3', 'name': 'climate change'}, {'id':...",We've been told to go out on a limb and say so...,2694257,gQliI_WGaGk
4,55,1524,TED2006,14000,https://www.ted.com/talks/jehane_noujaim_my_wi...,2006-07-25T00:11:00Z,2006-02-26,"[""2228"",""1476"",""800"",""2890"",""45233"",""2694""]","[{""name"":""Jehane Noujaim"",""occupation"":""Filmma...","[{""name"":""English"",""code"":""en""},{""name"":""Vietn...",Jehane Noujaim unveils her 2006 TED Prize wish...,My wish: A global day of film,"[{'id': '4', 'name': 'culture'}, {'id': '6', '...",I can't help but this wish: to think about whe...,489757,QCFSrb6B5nw
5,16,1394,TED2006,364000,https://www.ted.com/talks/helen_fisher_why_we_...,2006-09-06T00:11:00Z,2006-02-24,"[""307"",""374"",""1669"",""2590"",""31375"",""2252""]","[{""name"":""Helen Fisher"",""occupation"":""Anthropo...","[{""name"":""Greek"",""code"":""el""},{""name"":""English...",Anthropologist Helen Fisher takes on a tricky ...,"Why we love, why we cheat","[{'id': '4', 'name': 'culture'}, {'id': '8', '...",I'd like to talk today about the two biggest s...,12138500,x-ewvCNguug
6,64,1212,TED2004,42000,https://www.ted.com/talks/eve_ensler_happiness...,2006-09-06T00:11:00Z,2004-02-04,"[""217"",""751"",""1753"",""2288"",""37787"",""90405""]","[{""name"":""Eve Ensler"",""occupation"":""Playwright...","[{""name"":""Greek"",""code"":""el""},{""name"":""English...","Eve Ensler, creator of ""The Vagina Monologues,...",Happiness in body and soul,"[{'id': '4', 'name': 'culture'}, {'id': '6', '...",I bet you're worried. (Laughter) I was worried...,1405560,NQvMQEB0j_A
7,71,1248,TED2006,128000,https://www.ted.com/talks/rick_warren_a_life_o...,2006-07-18T00:11:00Z,2006-02-25,"[""94"",""676"",""2011"",""86"",""9125"",""31459""]","[{""name"":""Rick Warren"",""occupation"":""Pastor, a...","[{""name"":""Greek"",""code"":""el""},{""name"":""English...","Pastor Rick Warren, author of ""The Purpose-Dri...",A life of purpose,"[{'id': '4', 'name': 'culture'}, {'id': '90', ...","I'm often asked, ""What surprised you about the...",4280534,640BQNxB5mc
8,3,1105,TEDGlobal 2005,32000,https://www.ted.com/talks/ashraf_ghani_how_to_...,2006-10-18T00:11:00Z,2005-07-12,"[""127"",""1929"",""584"",""625"",""270"",""1321""]","[{""name"":""Ashraf Ghani"",""occupation"":""Presiden...","[{""name"":""Greek"",""code"":""el""},{""name"":""English...",Ashraf Ghani's passionate and powerful 10-minu...,How to rebuild a broken state,"[{'id': '4', 'name': 'culture'}, {'id': '6', '...","A public, Dewey long ago observed, is constitu...",1095372,A6GLw12jywo
9,66,1151,TED2006,2200000,https://www.ted.com/talks/sir_ken_robinson_do_...,2006-06-27T00:11:00Z,2006-02-25,"[""30217"",""865"",""1738"",""9048"",""2341"",""2182""]","[{""name"":""Sir Ken Robinson"",""occupation"":""Auth...","[{""name"":""Portuguese, Brazilian"",""code"":""pt-br...",Sir Ken Robinson makes an entertaining and pro...,Do schools kill creativity?,"[{'id': '4', 'name': 'culture'}, {'id': '52', ...",Good morning. How are you? (Audience) Good. It...,73389142,iG9CE55wbtY


In [41]:
# check type of views
print(type(df['views'][0]))

<class 'numpy.int32'>


In [42]:
# check type of topics
print(type(df['topics'][0]))

<class 'list'>


In [43]:
for i in df['topics'][0]:
    print(i)

{'id': '4', 'name': 'culture'}
{'id': '13', 'name': 'design'}
{'id': '18', 'name': 'business'}
{'id': '37', 'name': 'entertainment'}
{'id': '42', 'name': 'software'}
{'id': '85', 'name': 'storytelling'}
{'id': '86', 'name': 'communication'}
{'id': '87', 'name': 'community'}


In [44]:
# create dictionary of topics with id as key and topic as value
topics_dict = OrderedDict()
for row in df['topics']:
    topics_list = row
    for topic in topics_list:
        if topic['id'] not in topics_dict:
            topics_dict[int(topic['id'])] = topic['name'] 
        else:
            pass
# sort dictionary based on id
sorted_topics_dict = OrderedDict(sorted(topics_dict.items()))
print(sorted_topics_dict)

OrderedDict([(3, 'climate change'), (4, 'culture'), (5, 'environment'), (6, 'global issues'), (7, 'politics'), (8, 'science'), (9, 'sustainability'), (10, 'technology'), (13, 'design'), (14, 'engineering'), (15, 'industrial design'), (16, 'invention'), (18, 'business'), (19, 'corruption'), (21, 'economics'), (22, 'entrepreneur'), (24, 'military'), (25, 'NASA'), (27, 'flight'), (28, 'rocket science'), (30, 'transportation'), (31, 'DNA'), (32, 'biosphere'), (33, 'biotech'), (34, 'genetics'), (36, 'computers'), (37, 'entertainment'), (39, 'media'), (40, 'music'), (41, 'performance'), (42, 'software'), (44, 'architecture'), (45, 'cities'), (46, 'collaboration'), (47, 'death'), (49, 'interview'), (50, 'memory'), (51, 'urban planning'), (52, 'education'), (53, 'innovation'), (54, 'robots'), (55, 'social change'), (57, 'disease'), (58, 'food'), (59, 'health'), (60, 'health care'), (62, 'Africa'), (63, 'animals'), (64, 'nature'), (65, 'primates'), (66, 'cancer'), (68, 'creativity'), (69, 'art'

In [45]:
print(sorted_topics_dict[6942])

metaverse


In [46]:
topics_dict = OrderedDict()
for index, row in df.iterrows():
    topics_list = row['topics']
    for topic in topics_list:
        if topic['id'] not in topics_dict:
            topics_dict[int(topic['id'])] = topic['name'] 
        else:
            pass
# sort dictionary based on id
sorted_topics_dict = OrderedDict(sorted(topics_dict.items()))
print(sorted_topics_dict)

OrderedDict([(3, 'climate change'), (4, 'culture'), (5, 'environment'), (6, 'global issues'), (7, 'politics'), (8, 'science'), (9, 'sustainability'), (10, 'technology'), (13, 'design'), (14, 'engineering'), (15, 'industrial design'), (16, 'invention'), (18, 'business'), (19, 'corruption'), (21, 'economics'), (22, 'entrepreneur'), (24, 'military'), (25, 'NASA'), (27, 'flight'), (28, 'rocket science'), (30, 'transportation'), (31, 'DNA'), (32, 'biosphere'), (33, 'biotech'), (34, 'genetics'), (36, 'computers'), (37, 'entertainment'), (39, 'media'), (40, 'music'), (41, 'performance'), (42, 'software'), (44, 'architecture'), (45, 'cities'), (46, 'collaboration'), (47, 'death'), (49, 'interview'), (50, 'memory'), (51, 'urban planning'), (52, 'education'), (53, 'innovation'), (54, 'robots'), (55, 'social change'), (57, 'disease'), (58, 'food'), (59, 'health'), (60, 'health care'), (62, 'Africa'), (63, 'animals'), (64, 'nature'), (65, 'primates'), (66, 'cancer'), (68, 'creativity'), (69, 'art'

In [47]:
print(len(sorted_topics_dict))

353


In [48]:
data = list(sorted_topics_dict.items())
topics_df = pd.DataFrame(data, columns = ['Topic ID', 'topics'])
topics_df.set_index('Topic ID', inplace = True)
topics_df['Talk ID List'] = np.nan
topics_df

Unnamed: 0_level_0,topics,Talk ID List
Topic ID,Unnamed: 1_level_1,Unnamed: 2_level_1
3,climate change,
4,culture,
5,environment,
6,global issues,
7,politics,
...,...,...
6865,collective,
6942,metaverse,
6996,worklife,
7059,veganism,


In [49]:
# expand dataset to have one topic per row
topics_list = []

for index, row in df.iterrows():
    talk_id = row['id']
    duration = row['duration']
    event = row['event']
    likes = row['likes']
    page_url = row['page_url']
    published_date = row['published_date']
    recorded_date = row['recorded_date']
    related_videos = row['related_videos']
    speakers = row['speakers']
    subtitle_languages = row['subtitle_languages']
    summary = row['summary']
    title = row['title']
    topics = row['topics']
    transcript = row['transcript']
    views = row['views']
    youtube_video_code = row['youtube_video_code']
    for topic in topics:
        topics_list.append({'id': talk_id, 'duration': duration, 'event': event, 'likes': likes, 'page_url': page_url, 'published_date':published_date, 'recorded_date':recorded_date, 'related_videos':related_videos,'speakers':speakers,'subtitle_languages':subtitle_languages,'summary':summary,'title':title,'topic':topic,'transcript':transcript,'views':views,'youtube_video_code':youtube_video_code})

topics_df = pd.DataFrame(topics_list)

In [50]:
topics_df

Unnamed: 0,id,duration,event,likes,page_url,published_date,recorded_date,related_videos,speakers,subtitle_languages,summary,title,topic,transcript,views,youtube_video_code
0,21,992,TED2006,17000,https://www.ted.com/talks/mena_trott_meet_the_...,2006-08-25T00:11:00Z,2006-02-23,"[""144"",""1282"",""1379"",""87"",""2302"",""2638""]","[{""name"":""Mena Trott"",""occupation"":""Blogger; c...","[{""name"":""English"",""code"":""en""},{""name"":""Vietn...","The founding mother of the blog revolution, Mo...",Meet the founder of the blog revolution,"{'id': '4', 'name': 'culture'}","Over the past couple of days, as I've been pre...",589115,RlBTxuWCuL8
1,21,992,TED2006,17000,https://www.ted.com/talks/mena_trott_meet_the_...,2006-08-25T00:11:00Z,2006-02-23,"[""144"",""1282"",""1379"",""87"",""2302"",""2638""]","[{""name"":""Mena Trott"",""occupation"":""Blogger; c...","[{""name"":""English"",""code"":""en""},{""name"":""Vietn...","The founding mother of the blog revolution, Mo...",Meet the founder of the blog revolution,"{'id': '13', 'name': 'design'}","Over the past couple of days, as I've been pre...",589115,RlBTxuWCuL8
2,21,992,TED2006,17000,https://www.ted.com/talks/mena_trott_meet_the_...,2006-08-25T00:11:00Z,2006-02-23,"[""144"",""1282"",""1379"",""87"",""2302"",""2638""]","[{""name"":""Mena Trott"",""occupation"":""Blogger; c...","[{""name"":""English"",""code"":""en""},{""name"":""Vietn...","The founding mother of the blog revolution, Mo...",Meet the founder of the blog revolution,"{'id': '18', 'name': 'business'}","Over the past couple of days, as I've been pre...",589115,RlBTxuWCuL8
3,21,992,TED2006,17000,https://www.ted.com/talks/mena_trott_meet_the_...,2006-08-25T00:11:00Z,2006-02-23,"[""144"",""1282"",""1379"",""87"",""2302"",""2638""]","[{""name"":""Mena Trott"",""occupation"":""Blogger; c...","[{""name"":""English"",""code"":""en""},{""name"":""Vietn...","The founding mother of the blog revolution, Mo...",Meet the founder of the blog revolution,"{'id': '37', 'name': 'entertainment'}","Over the past couple of days, as I've been pre...",589115,RlBTxuWCuL8
4,21,992,TED2006,17000,https://www.ted.com/talks/mena_trott_meet_the_...,2006-08-25T00:11:00Z,2006-02-23,"[""144"",""1282"",""1379"",""87"",""2302"",""2638""]","[{""name"":""Mena Trott"",""occupation"":""Blogger; c...","[{""name"":""English"",""code"":""en""},{""name"":""Vietn...","The founding mother of the blog revolution, Mo...",Meet the founder of the blog revolution,"{'id': '42', 'name': 'software'}","Over the past couple of days, as I've been pre...",589115,RlBTxuWCuL8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39224,95709,649,TED2022,4500,https://www.ted.com/talks/scott_fitsimones_cou...,2022-07-26T15:04:59Z,2022-04-10,"[""84954"",""89893"",""91980"",""2107"",""1429"",""66946""]","[{""name"":""Scott Fitsimones"",""occupation"":""Expe...","[{""name"":""English"",""code"":""en""}]","Could DAOs, or ""decentralized autonomous organ...",Could a DAO build the next great city?,"{'id': '80', 'name': 'future'}",How do you start a new city? Turns out it's no...,151529,zTStDvUtQWc
39225,95709,649,TED2022,4500,https://www.ted.com/talks/scott_fitsimones_cou...,2022-07-26T15:04:59Z,2022-04-10,"[""84954"",""89893"",""91980"",""2107"",""1429"",""66946""]","[{""name"":""Scott Fitsimones"",""occupation"":""Expe...","[{""name"":""English"",""code"":""en""}]","Could DAOs, or ""decentralized autonomous organ...",Could a DAO build the next great city?,"{'id': '252', 'name': 'democracy'}",How do you start a new city? Turns out it's no...,151529,zTStDvUtQWc
39226,95709,649,TED2022,4500,https://www.ted.com/talks/scott_fitsimones_cou...,2022-07-26T15:04:59Z,2022-04-10,"[""84954"",""89893"",""91980"",""2107"",""1429"",""66946""]","[{""name"":""Scott Fitsimones"",""occupation"":""Expe...","[{""name"":""English"",""code"":""en""}]","Could DAOs, or ""decentralized autonomous organ...",Could a DAO build the next great city?,"{'id': '274', 'name': 'money'}",How do you start a new city? Turns out it's no...,151529,zTStDvUtQWc
39227,95709,649,TED2022,4500,https://www.ted.com/talks/scott_fitsimones_cou...,2022-07-26T15:04:59Z,2022-04-10,"[""84954"",""89893"",""91980"",""2107"",""1429"",""66946""]","[{""name"":""Scott Fitsimones"",""occupation"":""Expe...","[{""name"":""English"",""code"":""en""}]","Could DAOs, or ""decentralized autonomous organ...",Could a DAO build the next great city?,"{'id': '557', 'name': 'blockchain'}",How do you start a new city? Turns out it's no...,151529,zTStDvUtQWc


In [51]:
len(topics_df)

39229