In [1]:
import pandas as pd
import numpy as np
import ast

In [2]:
df = pd.read_csv('titles.csv')


## 'onw-hot encoding' genre

In [3]:
import ast

# Convert the string representation of the genres list into an actual list
df['genres'] = df['genres'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Extract all unique genres
unique_genres = set(genre for sublist in df['genres'].dropna() for genre in sublist)

unique_genres

{'action',
 'animation',
 'comedy',
 'crime',
 'documentation',
 'drama',
 'european',
 'family',
 'fantasy',
 'history',
 'horror',
 'music',
 'reality',
 'romance',
 'scifi',
 'sport',
 'thriller',
 'war',
 'western'}

In [4]:
# Add Boolean columns for each unique genre
for genre in unique_genres:
    df[genre] = df['genres'].apply(lambda x: genre in x if isinstance(x, list) else False)

# Display the first few rows of the updated DataFrame
df.head()


Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,...,western,thriller,history,action,reality,drama,sport,music,family,scifi
0,ts300399,Five Came Back: The Reference Films,SHOW,This collection includes 12 World War II-era p...,1945,TV-MA,51,[documentation],['US'],1.0,...,False,False,False,False,False,False,False,False,False,False
1,tm84618,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,114,"[drama, crime]",['US'],,...,False,False,False,False,False,True,False,False,False,False
2,tm154986,Deliverance,MOVIE,Intent on seeing the Cahulawassee River before...,1972,R,109,"[drama, action, thriller, european]",['US'],,...,False,True,False,True,False,True,False,False,False,False
3,tm127384,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,"[fantasy, action, comedy]",['GB'],,...,False,False,False,True,False,False,False,False,False,False
4,tm120801,The Dirty Dozen,MOVIE,12 American military prisoners in World War II...,1967,,150,"[war, action]","['GB', 'US']",,...,False,False,False,True,False,False,False,False,False,False


## Handling Null Values

In [5]:
df.isna().sum().sort_values()[df.isna().sum().sort_values()>0]

title                   1
description            18
tmdb_popularity        91
tmdb_score            311
imdb_id               403
imdb_score            482
imdb_votes            498
age_certification    2619
seasons              3744
dtype: int64

In [6]:
len(df)

5850

In [7]:
df_clean =df.dropna(subset=['imdb_score','title','tmdb_score','imdb_score'])
len(df_clean)

5145

## Combining IMDB and TMDB Score

In [8]:
df_clean['score'] = df_clean.apply(lambda row: (row['imdb_score'] + row['tmdb_score']) / 2 , axis=1)
df_clean.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['score'] = df_clean.apply(lambda row: (row['imdb_score'] + row['tmdb_score']) / 2 , axis=1)


Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,...,thriller,history,action,reality,drama,sport,music,family,scifi,score
1,tm84618,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,114,"[drama, crime]",['US'],,...,False,False,False,False,True,False,False,False,False,8.1895
2,tm154986,Deliverance,MOVIE,Intent on seeing the Cahulawassee River before...,1972,R,109,"[drama, action, thriller, european]",['US'],,...,True,False,True,False,True,False,False,False,False,7.5
3,tm127384,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,"[fantasy, action, comedy]",['GB'],,...,False,False,True,False,False,False,False,False,False,8.0055
4,tm120801,The Dirty Dozen,MOVIE,12 American military prisoners in World War II...,1967,,150,"[war, action]","['GB', 'US']",,...,False,False,True,False,False,False,False,False,False,7.65
5,ts22164,Monty Python's Flying Circus,SHOW,A British sketch comedy series with the shows ...,1969,TV-14,30,"[comedy, european]",['GB'],4.0,...,False,False,False,False,False,False,False,False,False,8.553


## Handling Titles with Multiple Production Countries

In [9]:
# Convert the 'production_countries' column to a list
df_clean['production_countries'] = df_clean['production_countries'].apply(ast.literal_eval)

# Filter titles with more than one production country
titles_more_than_one_country = df_clean[df_clean['production_countries'].apply(len) > 1]

# Filter titles with zero production countries
titles_zero_countries = df_clean[df_clean['production_countries'].apply(len) == 0]

print(f'# of titles with 0 countries: {len(titles_zero_countries)} ')
print(f'# of titles with more than 1 country: {len(titles_more_than_one_country)} ')


# of titles with 0 countries: 112 
# of titles with more than 1 country: 579 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['production_countries'] = df_clean['production_countries'].apply(ast.literal_eval)


In [10]:
# indexes to drop
zero_i =titles_zero_countries.index
to_drop_i = zero_i.union(titles_more_than_one_country.index)
df_map = df_clean.drop(to_drop_i)
print(len(df_map))

4454


## Geographical Coordinates Mapping

In [11]:
df_map['production_countries'] =df_map['production_countries'].astype(str)

df_map['production_countries'] =df_map['production_countries'].str.extract("\['(.*?)'")

df_map['production_countries'].unique()

array(['US', 'GB', 'EG', 'DE', 'IN', 'LB', 'JP', 'AR', 'AU', 'FR', 'MX',
       'DK', 'CA', 'HK', 'IT', 'RU', 'KR', 'CN', 'CO', 'CL', 'TR', 'TW',
       'NG', 'MY', 'Lebanon', 'PH', 'NO', 'ZA', 'XX', 'ID', 'SA', 'PS',
       'SG', 'ES', 'FI', 'IL', 'BR', 'PL', 'RO', 'NZ', 'UA', 'IE', 'IS',
       'SE', 'NL', 'HR', 'CD', 'TN', 'AE', 'AT', 'PK', 'VN', 'HU', 'TH',
       'BE', 'PR', 'KH', 'PE', 'GH', 'TZ', 'PY', 'ZW', 'CZ', 'CM', 'BD',
       'LU', 'JO', 'MU', 'IO', 'SN', 'UY', 'KW', 'PT', 'CH', 'AF', 'KE'],
      dtype=object)

In [12]:
df_map['production_countries'] = df_map['production_countries'].apply(lambda x: 'LB' if x=='Lebanon' else x)
df_map['production_countries'].unique()

array(['US', 'GB', 'EG', 'DE', 'IN', 'LB', 'JP', 'AR', 'AU', 'FR', 'MX',
       'DK', 'CA', 'HK', 'IT', 'RU', 'KR', 'CN', 'CO', 'CL', 'TR', 'TW',
       'NG', 'MY', 'PH', 'NO', 'ZA', 'XX', 'ID', 'SA', 'PS', 'SG', 'ES',
       'FI', 'IL', 'BR', 'PL', 'RO', 'NZ', 'UA', 'IE', 'IS', 'SE', 'NL',
       'HR', 'CD', 'TN', 'AE', 'AT', 'PK', 'VN', 'HU', 'TH', 'BE', 'PR',
       'KH', 'PE', 'GH', 'TZ', 'PY', 'ZW', 'CZ', 'CM', 'BD', 'LU', 'JO',
       'MU', 'IO', 'SN', 'UY', 'KW', 'PT', 'CH', 'AF', 'KE'], dtype=object)

In [13]:
# Calculate the average score for each country
average_scores = df_map.groupby('production_countries')['score'].mean()

# Calculate the count of titles for each country
title_counts = df_map['production_countries'].value_counts()

# Combine the two series into a single DataFrame for better presentation
country_statistics = pd.DataFrame({
    'Average Score': average_scores,
    'Title Count': title_counts
}).reset_index().rename(columns={'index': 'Country Code'})

len(country_statistics['Country Code'].unique())

75

In [14]:
len(country_statistics['Country Code'].unique())

75

here we drop the row with the country code XX as its probably a place holder

In [15]:
filtered_titles_df = country_statistics[country_statistics['Country Code'] != 'XX']

In [16]:
len(filtered_titles_df['Country Code'].unique())

74

---

In [17]:
df_country_info = pd.read_csv('countries_info.csv')
df_country_info.head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,Latitude (average),Longitude (average)
0,Afghanistan,"""AF""","""AFG""","""4""","""33""","""65"""
1,Albania,"""AL""","""ALB""","""8""","""41""","""20"""
2,Algeria,"""DZ""","""DZA""","""12""","""28""","""3"""
3,American Samoa,"""AS""","""ASM""","""16""","""-14.3333""","""-170"""
4,Andorra,"""AD""","""AND""","""20""","""42.5""","""1.6"""


In [18]:
for col in df_country_info.columns:
    df_country_info[col]=df_country_info[col].str.replace('"', '')
    df_country_info[col]=df_country_info[col].str.replace(' ', '')
df_country_info.head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,Latitude (average),Longitude (average)
0,Afghanistan,AF,AFG,4,33.0,65.0
1,Albania,AL,ALB,8,41.0,20.0
2,Algeria,DZ,DZA,12,28.0,3.0
3,AmericanSamoa,AS,ASM,16,-14.3333,-170.0
4,Andorra,AD,AND,20,42.5,1.6


In [19]:
df_country_info['Alpha-2 code'].unique()

array(['AF', 'AL', 'DZ', 'AS', 'AD', 'AO', 'AI', 'AQ', 'AG', 'AR', 'AM',
       'AW', 'AU', 'AT', 'AZ', 'BS', 'BH', 'BD', 'BB', 'BY', 'BE', 'BZ',
       'BJ', 'BM', 'BT', 'BO', 'BA', 'BW', 'BV', 'BR', 'IO', 'BN', 'BG',
       'BF', 'BI', 'KH', 'CM', 'CA', 'CV', 'KY', 'CF', 'TD', 'CL', 'CN',
       'CX', 'CC', 'CO', 'KM', 'CG', 'CD', 'CK', 'CR', 'CI', 'HR', 'CU',
       'CY', 'CZ', 'DK', 'DJ', 'DM', 'DO', 'EC', 'EG', 'SV', 'GQ', 'ER',
       'EE', 'ET', 'FK', 'FO', 'FJ', 'FI', 'FR', 'GF', 'PF', 'TF', 'GA',
       'GM', 'GE', 'DE', 'GH', 'GI', 'GR', 'GL', 'GD', 'GP', 'GU', 'GT',
       'GG', 'GN', 'GW', 'GY', 'HT', 'HM', 'VA', 'HN', 'HK', 'HU', 'IS',
       'IN', 'ID', 'IR', 'IQ', 'IE', 'IM', 'IL', 'IT', 'JM', 'JP', 'JE',
       'JO', 'KZ', 'KE', 'KI', 'KP', 'KR', 'KW', 'KG', 'LA', 'LV', 'LB',
       'LS', 'LR', 'LY', 'LI', 'LT', 'LU', 'MO', 'MK', 'MG', 'MW', 'MY',
       'MV', 'ML', 'MT', 'MH', 'MQ', 'MR', 'MU', 'YT', 'MX', 'FM', 'MD',
       'MC', 'MN', 'ME', 'MS', 'MA', 'MZ', 'MM', 'N

---
Long/Lat coordinates generation
---

In [20]:
from geopy.geocoders import Nominatim
unique_countries = df_map['production_countries'].dropna().unique()
unique_countries


array(['US', 'GB', 'EG', 'DE', 'IN', 'LB', 'JP', 'AR', 'AU', 'FR', 'MX',
       'DK', 'CA', 'HK', 'IT', 'RU', 'KR', 'CN', 'CO', 'CL', 'TR', 'TW',
       'NG', 'MY', 'PH', 'NO', 'ZA', 'XX', 'ID', 'SA', 'PS', 'SG', 'ES',
       'FI', 'IL', 'BR', 'PL', 'RO', 'NZ', 'UA', 'IE', 'IS', 'SE', 'NL',
       'HR', 'CD', 'TN', 'AE', 'AT', 'PK', 'VN', 'HU', 'TH', 'BE', 'PR',
       'KH', 'PE', 'GH', 'TZ', 'PY', 'ZW', 'CZ', 'CM', 'BD', 'LU', 'JO',
       'MU', 'IO', 'SN', 'UY', 'KW', 'PT', 'CH', 'AF', 'KE'], dtype=object)

In [21]:
country_coordinates = {
    'US': (37.0902, -95.7129), 'GB': (55.3781, -3.4360), 'EG': (26.8206, 30.8025), 'DE': (51.1657, 10.4515),
    'IN': (20.5937, 78.9629), 'CA': (56.1304, -106.3468), 'JP': (36.2048, 138.2529), 'AR': (-38.4161, -63.6167),
    'FR': (46.6034, 1.8883), 'IT': (41.8719, 12.5674), 'IE': (53.4129, -8.2439), 'HK': (22.3964, 114.1095),
    'AU': (-25.2744, 133.7751), 'MX': (23.6345, -102.5528), 'ES': (40.4637, -3.7492), 'BE': (50.5039, 4.4699),
    'TR': (38.9637, 35.2433), 'RU': (61.5240, 105.3188), 'CN': (35.8617, 104.1954), 'BR': (-14.2350, -51.9253),
    'SA': (23.8859, 45.0792), 'SE': (60.1282, 18.6435), 'NZ': (-40.9006, 174.8860), 'FI': (61.9241, 25.7482),
    'NO': (60.4720, 8.4689), 'KR': (35.9078, 127.7669), 'ZA': (-30.5595, 22.9375), 'CH': (46.8182, 8.2275),
    'AT': (47.5162, 14.5501), 'PL': (51.9194, 19.1451), 'NL': (52.1326, 5.2913), 'CZ': (49.8175, 15.4730),
    'GR': (39.0742, 21.8243), 'IL': (31.0461, 34.8516), 'TH': (15.8700, 100.9925), 'IQ': (33.2232, 43.6793),
    'IR': (32.4279, 53.6880), 'UA': (48.3794, 31.1656), 'PT': (39.3999, -8.2245), 'PK': (30.3753, 69.3451),
    'MY': (4.2105, 101.9758), 'HU': (47.1625, 19.5033), 'ID': (-0.7893, 113.9213), 'RO': (45.9432, 24.9668),
    'DK': (56.2639, 9.5018), 'SG': (1.3521, 103.8198), 'AE': (23.4241, 53.8478), 'VE': (6.4238, -66.5897),
    'PH': (12.8797, 121.7740), 'PE': (-9.1900, -75.0152), 'BD': (23.6850, 90.3563), 'KE': (-1.2921, 36.8219),
    'PY': (-23.4425, -58.4438), 'QA': (25.3548, 51.1839), 'VN': (14.0583, 108.2772), 'CO': (4.5709, -74.2973),
    'CL': (-35.6751, -71.5430), 'BE': (50.5039, 4.4699), 'BY': (53.7098, 27.9534), 'BG': (42.7339, 25.4858),
    'MA': (31.7917, -7.0926), 'NG': (9.0820, 8.6753), 'PE': (-9.1900, -75.0152), 'ZW': (-19.0154, 29.1549),
    'JO': (30.5852, 36.2384), 'LB': (33.8547, 35.8623), 'TN': (33.8869, 9.5375), 'LU': (49.8153, 6.1296),
    'EE': (58.5953, 25.0136), 'MT': (35.9375, 14.3754), 'TW': (23.6978, 120.9605), 'GH': (7.9465, -1.0232),
    'MU': (-20.3485, 57.5522), 'UY': (-32.5228, -55.7658), 'NG': (9.0820, 8.6753), 'BT': (27.5142, 90.4336),
    'KW': (29.3759, 47.9774), 'NP': (28.3949, 84.1240), 'VE': (6.4238, -66.5897), 'ZW': (-19.0154, 29.1549),
    'GH': (7.9465, -1.0232), 'MU': (-20.3485, 57.5522), 'AE': (23.4241, 53.8478), 'JO': (30.5852, 36.2384),
    'LB': (33.8547, 35.8623), 'TN': (33.8869, 9.5375), "PS": (31.9522, 35.2332),
    "IS": (64.9631, -19.0208),
    "HR": (45.1, 15.2000),
    "CD": (-4.0383, 21.7587),
    "PR": (18.2208, -66.5901),
    "KH": (12.5657, 104.9910),
    "TZ": (-6.3690, 34.8888),
    "CM": (7.3697, 12.3547),
    "IO": (-6.3432, 71.8765),
    "SN": (14.4974, -14.4524),
    "AF": (33.9391, 67.7100)
}


# Mapping countries to their approximate latitudes and longitudes
country_lat_lon = {country: country_coordinates.get(country, (None, None)) for country in unique_countries}

# Filter out countries that we couldn't find coordinates for
missing_coordinates = {k: v for k, v in country_lat_lon.items() if v == (None, None)}

country_lat_lon, missing_coordinates

({'US': (37.0902, -95.7129),
  'GB': (55.3781, -3.436),
  'EG': (26.8206, 30.8025),
  'DE': (51.1657, 10.4515),
  'IN': (20.5937, 78.9629),
  'LB': (33.8547, 35.8623),
  'JP': (36.2048, 138.2529),
  'AR': (-38.4161, -63.6167),
  'AU': (-25.2744, 133.7751),
  'FR': (46.6034, 1.8883),
  'MX': (23.6345, -102.5528),
  'DK': (56.2639, 9.5018),
  'CA': (56.1304, -106.3468),
  'HK': (22.3964, 114.1095),
  'IT': (41.8719, 12.5674),
  'RU': (61.524, 105.3188),
  'KR': (35.9078, 127.7669),
  'CN': (35.8617, 104.1954),
  'CO': (4.5709, -74.2973),
  'CL': (-35.6751, -71.543),
  'TR': (38.9637, 35.2433),
  'TW': (23.6978, 120.9605),
  'NG': (9.082, 8.6753),
  'MY': (4.2105, 101.9758),
  'PH': (12.8797, 121.774),
  'NO': (60.472, 8.4689),
  'ZA': (-30.5595, 22.9375),
  'XX': (None, None),
  'ID': (-0.7893, 113.9213),
  'SA': (23.8859, 45.0792),
  'PS': (31.9522, 35.2332),
  'SG': (1.3521, 103.8198),
  'ES': (40.4637, -3.7492),
  'FI': (61.9241, 25.7482),
  'IL': (31.0461, 34.8516),
  'BR': (-14.235,

In [22]:
df_map

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,...,thriller,history,action,reality,drama,sport,music,family,scifi,score
1,tm84618,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,114,"[drama, crime]",US,,...,False,False,False,False,True,False,False,False,False,8.1895
2,tm154986,Deliverance,MOVIE,Intent on seeing the Cahulawassee River before...,1972,R,109,"[drama, action, thriller, european]",US,,...,True,False,True,False,True,False,False,False,False,7.5000
3,tm127384,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,"[fantasy, action, comedy]",GB,,...,False,False,True,False,False,False,False,False,False,8.0055
5,ts22164,Monty Python's Flying Circus,SHOW,A British sketch comedy series with the shows ...,1969,TV-14,30,"[comedy, european]",GB,4.0,...,False,False,False,False,False,False,False,False,False,8.5530
6,tm70993,Life of Brian,MOVIE,"Brian Cohen is an average young Jewish man, bu...",1979,R,94,[comedy],GB,,...,False,False,False,False,False,False,False,False,False,7.9000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5835,tm985747,I Missed You,MOVIE,A woman who was overwhelmed by work and had no...,2021,,96,"[drama, romance]",TW,,...,False,False,False,False,True,False,False,False,False,6.0000
5836,tm959213,My Amanda,MOVIE,Two unusually close friends share every aspect...,2021,,89,"[drama, romance]",PH,,...,False,False,False,False,True,False,False,False,False,5.7000
5838,tm1053409,Happiness Ever After,MOVIE,Five years later from where we left our charac...,2021,,99,"[drama, romance]",ZA,,...,False,False,False,False,True,False,False,False,False,5.7500
5843,tm1097142,My Bride,MOVIE,The story follows a young man and woman who go...,2021,,93,"[romance, comedy, drama]",EG,,...,False,False,False,False,True,False,False,False,False,5.1500


In [23]:
# Fill the DataFrame with longitude and latitude based on the "production_countries" column
df_map['latitude'] = df_map['production_countries'].map(lambda x: country_lat_lon.get(x, (None, None))[0])
df_map['longitude'] = df_map['production_countries'].map(lambda x: country_lat_lon.get(x, (None, None))[1])



In [24]:
df_map

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,...,action,reality,drama,sport,music,family,scifi,score,latitude,longitude
1,tm84618,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,114,"[drama, crime]",US,,...,False,False,True,False,False,False,False,8.1895,37.0902,-95.7129
2,tm154986,Deliverance,MOVIE,Intent on seeing the Cahulawassee River before...,1972,R,109,"[drama, action, thriller, european]",US,,...,True,False,True,False,False,False,False,7.5000,37.0902,-95.7129
3,tm127384,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,"[fantasy, action, comedy]",GB,,...,True,False,False,False,False,False,False,8.0055,55.3781,-3.4360
5,ts22164,Monty Python's Flying Circus,SHOW,A British sketch comedy series with the shows ...,1969,TV-14,30,"[comedy, european]",GB,4.0,...,False,False,False,False,False,False,False,8.5530,55.3781,-3.4360
6,tm70993,Life of Brian,MOVIE,"Brian Cohen is an average young Jewish man, bu...",1979,R,94,[comedy],GB,,...,False,False,False,False,False,False,False,7.9000,55.3781,-3.4360
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5835,tm985747,I Missed You,MOVIE,A woman who was overwhelmed by work and had no...,2021,,96,"[drama, romance]",TW,,...,False,False,True,False,False,False,False,6.0000,23.6978,120.9605
5836,tm959213,My Amanda,MOVIE,Two unusually close friends share every aspect...,2021,,89,"[drama, romance]",PH,,...,False,False,True,False,False,False,False,5.7000,12.8797,121.7740
5838,tm1053409,Happiness Ever After,MOVIE,Five years later from where we left our charac...,2021,,99,"[drama, romance]",ZA,,...,False,False,True,False,False,False,False,5.7500,-30.5595,22.9375
5843,tm1097142,My Bride,MOVIE,The story follows a young man and woman who go...,2021,,93,"[romance, comedy, drama]",EG,,...,False,False,True,False,False,False,False,5.1500,26.8206,30.8025


In [25]:
# Dictionary mapping two-letter country codes to three-letter ISO codes
two_letter_to_full_name = {
    'US': 'United States', 'GB': 'United Kingdom', 'EG': 'Egypt', 'DE': 'Germany', 'IN': 'India', 'CA': 'Canada',
    'JP': 'Japan', 'AR': 'Argentina', 'FR': 'France', 'IT': 'Italy', 'IE': 'Ireland', 'HK': 'Hong Kong',
    'AU': 'Australia', 'MX': 'Mexico', 'ES': 'Spain', 'BE': 'Belgium', 'TR': 'Turkey', 'RU': 'Russia',
    'CN': 'China', 'BR': 'Brazil', 'SA': 'Saudi Arabia', 'SE': 'Sweden', 'NZ': 'New Zealand', 'FI': 'Finland',
    'NO': 'Norway', 'KR': 'South Korea', 'ZA': 'South Africa', 'CH': 'Switzerland', 'AT': 'Austria', 
    'PL': 'Poland', 'NL': 'Netherlands', 'CZ': 'Czech Republic', 'GR': 'Greece', 'IL': 'Israel', 
    'TH': 'Thailand', 'IQ': 'Iraq', 'IR': 'Iran', 'UA': 'Ukraine', 'PT': 'Portugal', 'PK': 'Pakistan',
    'MY': 'Malaysia', 'HU': 'Hungary', 'ID': 'Indonesia', 'RO': 'Romania', 'DK': 'Denmark', 'SG': 'Singapore', 
    'AE': 'United Arab Emirates', 'VE': 'Venezuela', 'PH': 'Philippines', 'PE': 'Peru', 'BD': 'Bangladesh', 
    'KE': 'Kenya', 'PY': 'Paraguay', 'QA': 'Qatar', 'VN': 'Vietnam', 'CO': 'Colombia', 'CL': 'Chile', 
    'BY': 'Belarus', 'BG': 'Bulgaria', 'MA': 'Morocco', 'NG': 'Nigeria', 'ZW': 'Zimbabwe', 'JO': 'Jordan', 
    'LB': 'Lebanon', 'TN': 'Tunisia', 'LU': 'Luxembourg', 'EE': 'Estonia', 'MT': 'Malta', 'TW': 'Taiwan', 
    'GH': 'Ghana', 'MU': 'Mauritius', 'UY': 'Uruguay', 'BT': 'Bhutan', 'KW': 'Kuwait', 'NP': 'Nepal'
}

# Replace two-letter codes with three-letter ISO codes
df_map['production_countries'] = df_map['production_countries'].map(two_letter_to_full_name.get)

# Display the first few rows of the dataframe after the transformation
df_map.head()



Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,...,action,reality,drama,sport,music,family,scifi,score,latitude,longitude
1,tm84618,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,114,"[drama, crime]",United States,,...,False,False,True,False,False,False,False,8.1895,37.0902,-95.7129
2,tm154986,Deliverance,MOVIE,Intent on seeing the Cahulawassee River before...,1972,R,109,"[drama, action, thriller, european]",United States,,...,True,False,True,False,False,False,False,7.5,37.0902,-95.7129
3,tm127384,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,"[fantasy, action, comedy]",United Kingdom,,...,True,False,False,False,False,False,False,8.0055,55.3781,-3.436
5,ts22164,Monty Python's Flying Circus,SHOW,A British sketch comedy series with the shows ...,1969,TV-14,30,"[comedy, european]",United Kingdom,4.0,...,False,False,False,False,False,False,False,8.553,55.3781,-3.436
6,tm70993,Life of Brian,MOVIE,"Brian Cohen is an average young Jewish man, bu...",1979,R,94,[comedy],United Kingdom,,...,False,False,False,False,False,False,False,7.9,55.3781,-3.436


In [29]:
#create column count of movies per country
df_map['count'] = df_map.groupby('production_countries')['production_countries'].transform('count')

In [30]:
#only keep id,score,production_countries,count,latitude,longitude
df_map = df_map[['id','score','production_countries','count','latitude','longitude']]

In [31]:

output_path = 'titles_with_coordinates.csv'


df_map.to_csv(output_path, index=False)

In [32]:
df_map

Unnamed: 0,id,score,production_countries,count,latitude,longitude
1,tm84618,8.1895,United States,1800.0,37.0902,-95.7129
2,tm154986,7.5000,United States,1800.0,37.0902,-95.7129
3,tm127384,8.0055,United Kingdom,200.0,55.3781,-3.4360
5,ts22164,8.5530,United Kingdom,200.0,55.3781,-3.4360
6,tm70993,7.9000,United Kingdom,200.0,55.3781,-3.4360
...,...,...,...,...,...,...
5835,tm985747,6.0000,Taiwan,52.0,23.6978,120.9605
5836,tm959213,5.7000,Philippines,74.0,12.8797,121.7740
5838,tm1053409,5.7500,South Africa,24.0,-30.5595,22.9375
5843,tm1097142,5.1500,Egypt,26.0,26.8206,30.8025


In [None]:
df_country_info.set_index('Alpha-2 code', inplace=True)

In [None]:
df_country_info

Unnamed: 0_level_0,Country,Alpha-3 code,Numeric code,Latitude (average),Longitude (average)
Alpha-2 code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AF,Afghanistan,AFG,4,33,65
AL,Albania,ALB,8,41,20
DZ,Algeria,DZA,12,28,3
AS,American Samoa,ASM,16,-14.3333,-170
AD,Andorra,AND,20,42.5,1.6
...,...,...,...,...,...
WF,Wallis and Futuna,WLF,876,-13.3,-176.2
EH,Western Sahara,ESH,732,24.5,-13
YE,Yemen,YEM,887,15,48
ZM,Zambia,ZMB,894,-15,30


In [None]:
# Set the 'Country Code' column as the index for easier lookup
country_statistics.set_index('Country Code', inplace=True)

In [None]:
map_df = df_country_info.copy()
for col in country_statistics.columns:
    map_df[col] = map_df['Alpha-2 code'].map(country_statistics[col])
len(map_df)



256

In [None]:
countries =list(country_statistics.index)

In [None]:
map_df['Alpha-2 code']

Series([], Name: Alpha-2 code, dtype: object)

In [None]:
map_df['Alpha-2 code'] = map_df['Alpha-2 code'].astype(str)

In [None]:
map_df.dropna(subset=['Average Score'],inplace=True)
len(map_df)

0

In [None]:
mask = map_df['Alpha-2 code'].apply(lambda x: False if countries.__contains__(x) else True)
len(map_df[mask])

256

---

In [None]:
df_map.head()

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,...,reality,romance,comedy,european,animation,music,history,family,horror,score
1,tm84618,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,114,"[drama, crime]",US,,...,False,False,False,False,False,False,False,False,False,8.1895
2,tm154986,Deliverance,MOVIE,Intent on seeing the Cahulawassee River before...,1972,R,109,"[drama, action, thriller, european]",US,,...,False,False,False,True,False,False,False,False,False,7.5
3,tm127384,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,"[fantasy, action, comedy]",GB,,...,False,False,True,False,False,False,False,False,False,8.0055
5,ts22164,Monty Python's Flying Circus,SHOW,A British sketch comedy series with the shows ...,1969,TV-14,30,"[comedy, european]",GB,4.0,...,False,False,True,True,False,False,False,False,False,8.553
6,tm70993,Life of Brian,MOVIE,"Brian Cohen is an average young Jewish man, bu...",1979,R,94,[comedy],GB,,...,False,False,True,False,False,False,False,False,False,7.9


In [None]:
df.to_csv('titles_wiht_genres.csv')

In [None]:
df.columns

Index(['id', 'title', 'type', 'description', 'release_year',
       'age_certification', 'runtime', 'genres', 'production_countries',
       'seasons', 'imdb_id', 'imdb_score', 'imdb_votes', 'tmdb_popularity',
       'tmdb_score', 'music', 'drama', 'scifi', 'reality', 'war', 'comedy',
       'sport', 'documentation', 'european', 'history', 'western', 'action',
       'romance', 'fantasy', 'horror', 'animation', 'family', 'crime',
       'thriller'],
      dtype='object')

In [None]:
df.dropna(subset=['imdb_score',''],inplace=True)

In [None]:
df.isna().sum()

id                         0
title                      0
type                       0
description                5
release_year               0
age_certification       2335
runtime                    0
genres                     0
production_countries       0
seasons                 3429
imdb_id                    0
imdb_score                 0
imdb_votes                16
tmdb_popularity           75
tmdb_score               223
music                      0
drama                      0
scifi                      0
reality                    0
war                        0
comedy                     0
sport                      0
documentation              0
european                   0
history                    0
western                    0
action                     0
romance                    0
fantasy                    0
horror                     0
animation                  0
family                     0
crime                      0
thriller                   0
dtype: int64

In [None]:
df.to_csv('titles_wiht_genres.csv')

In [None]:
def process_drilldown_data(originalDrilldownData):
    # Extracting the data to form a list of actors and their respective scores
    actor_score_data = []
    for category in originalDrilldownData:
        actor_score_data.extend(category['data'])

    # Sort the list based on scores (and then by actor names for ties)
    sorted_actor_score_data = sorted(actor_score_data, key=lambda x: (-x[1], x[0]))

    # Extract top 10 actors and their scores
    top_10_actors = sorted_actor_score_data[:10]

    # Extracting the data to form the category-specific drilldown data
    drilldown_data = []
    for category in originalDrilldownData:
        drilldown_data.append({
            'name': category['name'],
            'id': category['id'],
            'data': [item[0] for item in category['data']]
        })

    return top_10_actors, drilldown_data

# Example usage:

originalDrilldownData = [{'name': 'comedy', 'id': 'comedy', 'data': [['Ahn Jae-hong', 9.2], ['Kim Sung-kyun', 9.2], ['Kim Seol', 9.2], ['Ra Mi-ran', 9.2], ['Park Bo-gum', 9.2], ['Ryu Hye-young', 9.2], ['Ryu Jun-yeol', 9.2], ['Lee Hye-ri', 9.2], ['Chisa Yokoyama', 9.0], ['Banjo Ginga', 9.0], ['Paul Riley', 8.9], ['Michael Richards', 8.9], ['Mark Cox', 8.9], ['Ford Kiernan', 8.9], ['Jason Alexander', 8.9], ['Jane McCarry', 8.9], ['Unsho Ishizuka', 8.9], ['Sanjeev Kohli', 8.9], ['Gavin Mitchell', 8.9], ['Aoi Tada', 8.9], ['Greg Hemphill', 8.9], ['Shin Hyun-bin', 8.8], ['Huang Ziteng', 8.8], ['He Peng', 8.8], ['Yu Bin', 8.8], ['Bowen Li', 8.8], ['Zhang Jingtong', 8.8], ['Liu Haikuan', 8.8], ['Moon Tae-Yoo', 8.8], ['Lu Jianmin', 8.8], ['Zheng Fanxing', 8.8], ['Cao Junxiang', 8.8], ['Jeon Mi-do', 8.8], ['Cao Yuchen', 8.8], ['Liu Yinjun', 8.8], ['Katsuhisa Hôki', 8.8], ['Wang Haoxuan', 8.8], ['Song Jiyang', 8.8], ['You Chea-myung', 8.8], ['Meng Ziyi', 8.8]]}, {'name': 'european', 'id': 'european', 'data': [['Mark Cox', 8.9], ['Greg Hemphill', 8.9], ['Sanjeev Kohli', 8.9], ['Gavin Mitchell', 8.9], ['Paul Riley', 8.9], ['Jane McCarry', 8.9], ['Ford Kiernan', 8.9], ['Graham Chapman', 8.8], ['Michael Palin', 8.8], ['Eric Idle', 8.8], ['Terry Jones', 8.8], ['Terry Gilliam', 8.8], ["Natasha O'Keeffe", 8.8], ['Paddy McGuinness', 8.7], ['Stephen Boxer', 8.7], ['Chris Harris', 8.7], ["Josh O'Connor", 8.7], ['Andrew Flintoff', 8.7], ['Emma Corrin', 8.7], ['Marion Bailey', 8.7], ['Erin Doherty', 8.7], ['Gillian Anderson', 8.7], ['Helena Bonham Carter', 8.7], ['Sue Perkins', 8.6], ['Mary Berry', 8.6], ['Mel Giedroyc', 8.6], ['Robert LaSardo', 8.5], ['Fred Fischer', 8.5], ['Mikael Birkkjær', 8.5], ['Lisbeth Wulff', 8.5], ['Pilou Asbæk', 8.5], ['Didier Legros', 8.5], ['Jeff McBride', 8.5], ['Jernard Burks', 8.5], ['Birgitte Hjort Sørensen', 8.5], ['Lars Mikkelsen', 8.5], ['Eric Challier', 8.5], ['Sonny Zito', 8.5], ['Junior Almeida', 8.5], ['Mark Rowley', 8.5]]}, {'name': 'scifi', 'id': 'scifi', 'data': [['Zach Tyler', 9.3], ['Jessie Flower', 9.3], ['Dante Basco', 9.3], ['Cricket Leigh', 9.3], ['André Sogliuzzo', 9.3], ['Olivia Hack', 9.3], ['Toks Olagundoye', 9.0], ['Harry Lloyd', 9.0], ['Jason Spisak', 9.0], ['Hailee Steinfeld', 9.0], ['Unsho Ishizuka', 8.9], ['Aoi Tada', 8.9], ['Greg Baldwin', 8.850000000000001], ['Jack De Sena', 8.850000000000001], ['Ken Watanabe', 8.8], ['Tom Berenger', 8.8], ['Tom Hardy', 8.8], ['Nicolas Clerc', 8.8], ['Nicole Pulliam', 8.8], ['Felix Scott', 8.8], ['Johnathan Geare', 8.8], ['Andrew Pleavin', 8.8], ['Tohoru Masamune', 8.8], ['Lukas Haas', 8.8], ['Lisa Reynolds', 8.8], ['Earl Cameron', 8.8], ['Dileep Rao', 8.8], ['Alex Lombard', 8.8], ['Magnus Nolan', 8.8], ['Natasha Beaumont', 8.8], ['Jill Maddrell', 8.8], ['Tim Kelleher', 8.8], ['Shannon Welles', 8.8], ['Marc Raducci', 8.8], ['Helena Cullinan', 8.8], ['Pete Postlethwaite', 8.8], ['Miranda Nolan', 8.8], ['Taylor Geare', 8.8], ['Mark Fleischmann', 8.8], ['Jean-Michel Dagory', 8.8]]}, {'name': 'action', 'id': 'action', 'data': [['Cricket Leigh', 9.3], ['Jessie Flower', 9.3], ['Olivia Hack', 9.3], ['Zach Tyler', 9.3], ['André Sogliuzzo', 9.3], ['Saiee Manjrekar', 9.1], ['Toks Olagundoye', 9.0], ['Harry Lloyd', 9.0], ['Chisa Yokoyama', 9.0], ['Yui Ishikawa', 9.0], ['Kevin Alejandro', 9.0], ['Jason Spisak', 9.0], ['Aoi Tada', 8.9], ['Greg Baldwin', 8.850000000000001], ['Jack De Sena', 8.850000000000001], ['Feng Ming Jing', 8.8], ['Nicolas Clerc', 8.8], ['Johnathan Geare', 8.8], ['Yu Bin', 8.8], ['Dileep Rao', 8.8], ['Guo Cheng', 8.8], ['Mark Fleischmann', 8.8], ['Magnus Nolan', 8.8], ['Jean-Michel Dagory', 8.8], ['Naoya Uchida', 8.8], ['Tom Berenger', 8.8], ['Wang Haoxuan', 8.8], ['Wang Yibo', 8.8], ['Tohoru Masamune', 8.8], ['Huang Ziteng', 8.8], ['Chen Zhuoxuan', 8.8], ['Adam Cole', 8.8], ['Tim Kelleher', 8.8], ['Marc Raducci', 8.8], ['Silvie Laguna', 8.8], ['Song Jiyang', 8.8], ['Qi Peixin', 8.8], ['Helena Cullinan', 8.8], ['Wang Zhuocheng', 8.8], ['Jack Murray', 8.8]]}, {'name': 'crime', 'id': 'crime', 'data': [['Anna Gunn', 9.5], ['RJ Mitte', 9.5], ['Betsy Brandt', 9.5], ['Bob Odenkirk', 9.15], ['Diego Alonso', 9.0], ['Dante Mastropierro', 9.0], ['Ana Celentano', 9.0], ['Jorge Sesán', 9.0], ['Mamoru Miyano', 9.0], ['Franco Tirri', 9.0], ['Nozomu Sasaki', 9.0], ['Augusto Brítez', 9.0], ['Ariel Staltari', 9.0], ['Noriko Hidaka', 9.0], ['Ethan Herisse', 8.9], ['Unsho Ishizuka', 8.9], ['Aunjanue Ellis', 8.9], ['John Leguizamo', 8.9], ['Niecy Nash', 8.9], ['Marquis Rodriguez', 8.9], ['Aoi Tada', 8.9], ['Asante Blackk', 8.9], ['Caleel Harris', 8.9], ['Kylie Bunbury', 8.9], ['Patrick Fabian', 8.8], ['Tony Dalton', 8.8], ['Matias Varela', 8.8], ['Andrea Londo', 8.8], ['Matt Whelan', 8.8], ['Giancarlo Esposito', 8.8], ['Pêpê Rapazote', 8.8], ['Michael Mando', 8.8], ['Michael Stahl-David', 8.8], ['Francisco Denis', 8.8], ["Natasha O'Keeffe", 8.8], ['Norman Barbera', 8.7], ['Nicole Burdette', 8.7], ['Steve Forleo', 8.7], ['Michaelangelo Graziano', 8.7], ['Vincent Pastore', 8.7]]}, {'name': 'drama', 'id': 'drama', 'data': [['Betsy Brandt', 9.5], ['Anna Gunn', 9.5], ['Lee Hye-ri', 9.2], ['Ryu Jun-yeol', 9.2], ['Kim Seol', 9.2], ['Kim Sung-kyun', 9.2], ['Lee Dong-hwi', 9.2], ['Vinay Nallakadi', 9.1], ['Saiee Manjrekar', 9.1], ['Yui Okada', 9.1], ['Diego Alonso', 9.0], ['Franco Tirri', 9.0], ['Ayane Sakura', 9.0], ['Dante Mastropierro', 9.0], ['Ariel Staltari', 9.0], ['Augusto Brítez', 9.0], ['Jorge Sesán', 9.0], ['Ella Purnell', 9.0], ['Unsho Ishizuka', 8.9], ['Karthik Rathnam', 8.9], ['Kylie Bunbury', 8.9], ['Praveena Paruchuri', 8.9], ['Radha Bessy', 8.9], ['Praneeta Patnaik', 8.9], ['Nithya Sree', 8.9], ['Caleel Harris', 8.9], ['Asante Blackk', 8.9], ['Mohan Bhagath', 8.9], ['Ethan Herisse', 8.9], ['Kesava Karri', 8.9], ['Aoi Tada', 8.9], ['Byron Minns', 8.8], ['Bryan Hanna', 8.8], ['Francesca Trentacarlini', 8.8], ['Cao Junxiang', 8.8], ['Matt Whelan', 8.8], ['He Peng', 8.8], ["Richard D'Alessandro", 8.8], ['Isabel Rose', 8.8], ['Mykelti Williamson', 8.8]]}, {'name': 'animation', 'id': 'animation', 'data': [['Dante Basco', 9.3], ['André Sogliuzzo', 9.3], ['Zach Tyler', 9.3], ['Jessie Flower', 9.3], ['Cricket Leigh', 9.3], ['Hailee Steinfeld', 9.0], ['Ella Purnell', 9.0], ['Chisa Yokoyama', 9.0], ['Harry Lloyd', 9.0], ['Toks Olagundoye', 9.0], ['Jason Spisak', 9.0], ['Kevin Alejandro', 9.0], ['Aoi Tada', 8.9], ['Greg Baldwin', 8.850000000000001], ['Jack De Sena', 8.850000000000001], ['Amy Sedaris', 8.8], ['Aaron Paul', 8.8], ['Naoya Uchida', 8.8], ['Paul F. Tompkins', 8.8], ['Alison Brie', 8.8], ['Krishna', 8.7], ['Zerocalcare', 8.7], ['Valerio Mastandrea', 8.7], ['He Xiaofeng', 8.6], ['Akari Kito', 8.6], ['Zhao Chengchen', 8.6], ['Mae Whitman', 8.55], ['You Taichi', 8.5], ['Ayaka Nanase', 8.5], ['Shinnosuke Mitsushima', 8.5], ['Tao Tsuchiya', 8.5], ['Shash Hira', 8.5], ['Yukitoshi Kikuchi', 8.5], ['Nneka Okoye', 8.5], ['Awkwafina', 8.4], ['Tom Hiddleston', 8.4], ['Seychelle Gabriel', 8.4], ['Takahiro Mizushima', 8.4], ['Jason Isaacs', 8.4], ['Logan Wells', 8.4]]}, {'name': 'family', 'id': 'family', 'data': [['Zach Tyler', 9.3], ['André Sogliuzzo', 9.3], ['Jessie Flower', 9.3], ['Dante Basco', 9.3], ['Cricket Leigh', 9.3], ['Kim Sung-kyun', 9.2], ['Choi Sung-won', 9.2], ['Ryu Jun-yeol', 9.2], ['Ryu Hye-young', 9.2], ['Lee Se-young', 9.2], ['Ahn Jae-hong', 9.2], ['Choi Moo-sung', 9.2], ['You Chea-myung', 9.2], ['Kim Sun-young', 9.2], ['Kim Seol', 9.2], ['Lee Hye-ri', 9.2], ['Lee Il-hwa', 9.2], ['Ra Mi-ran', 9.2], ['Lee Min-ji', 9.2], ['Lee Dong-hwi', 9.2], ['Go Kyung-pyo', 9.2], ['Park Bo-gum', 9.2], ['Yui Okada', 9.1], ['Greg Baldwin', 8.850000000000001], ['Shin Eun-jung', 8.8], ['Jung Hae-kyun', 8.8], ['Kim Soo-jin', 8.8], ['Kim Tae-hun', 8.8], ['Kim Hyun-mok', 8.8], ['Lee So-yeong', 8.8], ['Jung Hee-tae', 8.8], ['Kim Kwon', 8.8], ['Yoon Ji-hye', 8.8], ['Song Kang', 8.8], ['Cho Seong-ha', 8.8], ['Jo Bok-rae', 8.8], ['Hong Seung-hee', 8.8], ['Corrine Koslo', 8.7], ['Geraldine James', 8.7], ['Dalmar Abuzeid', 8.7]]}, {'name': 'fantasy', 'id': 'fantasy', 'data': [['Dante Basco', 9.3], ['Zach Tyler', 9.3], ['Cricket Leigh', 9.3], ['André Sogliuzzo', 9.3], ['Jessie Flower', 9.3], ['Hailee Steinfeld', 9.0], ['Jason Spisak', 9.0], ['Harry Lloyd', 9.0], ['Banjo Ginga', 9.0], ['Chisa Yokoyama', 9.0], ['Ella Purnell', 9.0], ['Aoi Tada', 8.9], ['Greg Baldwin', 8.850000000000001], ['Jack De Sena', 8.850000000000001], ['Cao Junxiang', 8.8], ['Cao Yuchen', 8.8], ['Wang Yizhou', 8.8], ['He Peng', 8.8], ['Wang Yibo', 8.8], ['Bowen Li', 8.8], ['Zhang Jingtong', 8.8], ['Yu Bin', 8.8], ['Zheng Fanxing', 8.8], ['Xiao Zhan', 8.8], ['Lu Jianmin', 8.8], ['Wang Zhuocheng', 8.8], ['Lu Enjie', 8.8], ['Xiu Qing', 8.8], ['Meng Ziyi', 8.8], ['Ji Li', 8.8], ['Guo Cheng', 8.8], ['Huang Ziteng', 8.8], ['Song Jiyang', 8.8], ['Chen Zhuoxuan', 8.8], ['Feng Ming Jing', 8.8], ['Katsuhisa Hôki', 8.8], ['Zhu Zanjin', 8.8], ['Liu Yinjun', 8.8], ['Qi Peixin', 8.8], ['Liu Haikuan', 8.8]]}, {'name': 'music', 'id': 'music', 'data': [['Carl Gilliard', 8.8], ['Elliot Page', 8.8], ['Tai-Li Lee', 8.8], ['Mark Fleischmann', 8.8], ['Marion Cotillard', 8.8], ['Virgile Bramly', 8.8], ['Marc Raducci', 8.8], ['Earl Cameron', 8.8], ['Magnus Nolan', 8.8], ['Dileep Rao', 8.8], ['Alex Lombard', 8.8], ['Johnathan Geare', 8.8], ['Lukas Haas', 8.8], ['Joseph Gordon-Levitt', 8.8], ['Daniel Girondeaud', 8.8], ['Yuji Okumoto', 8.8], ['Lisa Reynolds', 8.8], ['Jean-Michel Dagory', 8.8], ['Talulah Riley', 8.8], ['Coralie Dedykere', 8.8], ['Peter Basham', 8.8], ['Natasha Beaumont', 8.8], ['Tohoru Masamune', 8.8], ['Helena Cullinan', 8.8], ['Miranda Nolan', 8.8], ['Tom Berenger', 8.8], ['Tom Hardy', 8.8], ['Tim Kelleher', 8.8], ['Nicolas Clerc', 8.8], ['Nicole Pulliam', 8.8], ['Andrew Pleavin', 8.8], ['Felix Scott', 8.8], ['Michael Gaston', 8.8], ['Jack Murray', 8.8], ['Taylor Geare', 8.8], ['Pete Postlethwaite', 8.8], ['Russ Fega', 8.8], ['Jill Maddrell', 8.8], ['Ryan Hayward', 8.8], ['Claire Geare', 8.8]]}, {'name': 'romance', 'id': 'romance', 'data': [['Ahn Jae-hong', 9.2], ['Ra Mi-ran', 9.2], ['Lee Hye-ri', 9.2], ['Sung Dong-il', 9.2], ['Ryu Jun-yeol', 9.2], ['Kim Sung-kyun', 9.2], ['Ryu Hye-young', 9.2], ['Kim Seol', 9.2], ['Lee Dong-hwi', 9.2], ['Christopher Jones', 8.8], ['Matt Wallace', 8.8], ['Dick Cavett', 8.8], ['Michael McFall', 8.8], ['David Brisbin', 8.8], ['Michael Mattison', 8.8], ['Daniel J. Gillooly', 8.8], ['Liu Haikuan', 8.8], ['Liu Yinjun', 8.8], ['Lu Enjie', 8.8], ['Lu Jianmin', 8.8], ['Daniel C. Striepeke', 8.8], ['Michael Kemmerling', 8.8], ['Michael Jace', 8.8], ['Michael Conner Humphreys', 8.8], ['Michael Burgess', 8.8], ['Margo Moorer', 8.8], ['Don Fischer', 8.8], ['Marla Sucharetza', 8.8], ['Meng Ziyi', 8.8], ['Marlena Smalls', 8.8], ['Mary Ellen Trainor', 8.8], ['Matt Rebenkoff', 8.8], ['Mark Matheisen', 8.8], ['Al Harrington', 8.8], ['Mike Jolly', 8.8], ['Emily Carey', 8.8], ['Ji Li', 8.8], ['Jim Hanks', 8.8], ['Jacqueline Lovell', 8.8], ['Joe Alaskey', 8.8]]}, {'name': 'reality', 'id': 'reality', 'data': [['Carla Hall', 8.9], ['Niklas Ekstedt', 8.9], ['Heston Blumenthal', 8.9], ['Jayde Adams', 8.9], ['Andrew Flintoff', 8.7], ['Chris Harris', 8.7], ['Bo Burnham', 8.7], ['Paddy McGuinness', 8.7], ['Sue Perkins', 8.6], ['Mary Berry', 8.6], ['Mel Giedroyc', 8.6], ['Brooke Satchwell', 8.6], ['Paul Hollywood', 8.6], ['Sydney Scotia', 8.5], ['Dylan Playfair', 8.5], ['Ellie Harvie', 8.5], ['Travis Turner', 8.5], ['Harrison Houde', 8.5], ['Kolton Stewart', 8.5], ['Charlie Storwick', 8.5], ['Mark Towle', 8.4], ['Shawn Pilot', 8.4], ['Jasper Liu', 8.4], ['Constance Nunes', 8.4], ['Antoni Porowski', 8.35], ['Karamo Brown', 8.35], ['Moon Ga-young', 8.3], ['J. Neilson', 8.3], ['Grady Powell', 8.3], ['David Lain Baker', 8.3], ['An So-hee', 8.3], ['Byeon Woo-seok', 8.3], ['Kim Ye-won', 8.3], ['Kim Seon-ho', 8.3], ['Shin Hyun-soo', 8.3], ['Doug Marcaida', 8.3], ['Lee Yi-kyung', 8.3], ['Kiko Mizuhara', 8.2], ['Ayumu Mochizuki', 8.1], ['Benji Chester', 8.1]]}, {'name': 'western', 'id': 'western', 'data': [['Aoi Tada', 8.9], ['Megumi Hayashibara', 8.9], ['Koichi Yamadera', 8.9], ['Unsho Ishizuka', 8.9], ['Jake Garber', 8.4], ['Kerry Sims', 8.4], ['Clay Donahue Fontenot', 8.4], ['Kesha Bullard', 8.4], ['Kerry Washington', 8.4], ['Cooper Huckabee', 8.4], ['Tenaj L. Jackson', 8.4], ['Ted Neeley', 8.4], ['Keniaryn Mitchell', 8.4], ['Misty Upham', 8.4], ['Takara Clark', 8.4], ['Dane Rhodes', 8.4], ["Monica Rene'e Anderson", 8.4], ['Danièle Watts', 8.4], ['Dave Coennen', 8.4], ['Kasey James', 8.4], ['Ritchie Montgomery', 8.4], ['Cindy Mah', 8.4], ['Ned Bellamy', 8.4], ['Christopher Berry', 8.4], ['Brian Brown', 8.4], ['Laura Cayouette', 8.4], ['Tom Wopat', 8.4], ['Kinetic', 8.4], ['Carl Bailey', 8.4], ['Carl Singleton', 8.4], ['Mike DeMille', 8.4], ['Miriam F. Glover', 8.4], ['Tom Savini', 8.4], ['Catherine Lambert', 8.4], ['Kimberley Drummond', 8.4], ['Jamie Foxx', 8.4], ['Kim Robillard', 8.4], ['Kim Collins', 8.4], ['Christoph Waltz', 8.4], ['David G. Baker', 8.4]]}, {'name': 'thriller', 'id': 'thriller', 'data': [['Betsy Brandt', 9.5], ['Anna Gunn', 9.5], ['Bob Odenkirk', 9.5], ['Nozomu Sasaki', 9.0], ['Noriko Hidaka', 9.0], ['Unsho Ishizuka', 8.9], ['Aoi Tada', 8.9], ['Ken Watanabe', 8.8], ['Lu Enjie', 8.8], ['Liu Yinjun', 8.8], ['Song Jiyang', 8.8], ['Adam Cole', 8.8], ['Bowen Li', 8.8], ['Dileep Rao', 8.8], ['Huang Ziteng', 8.8], ['Matias Varela', 8.8], ['Nicolas Clerc', 8.8], ['Matt Whelan', 8.8], ['Lisa Reynolds', 8.8], ['Lu Jianmin', 8.8], ['Nicole Pulliam', 8.8], ['Marc Raducci', 8.8], ['Yu Bin', 8.8], ['Daniel Girondeaud', 8.8], ['Yuji Okumoto', 8.8], ['Andrea Londo', 8.8], ['Carman Lee', 8.8], ['Liu Haikuan', 8.8], ['Carl Gilliard', 8.8], ['Shannon Welles', 8.8], ['Talulah Riley', 8.8], ['Wang Haoxuan', 8.8], ['Wang Yibo', 8.8], ['Wang Yizhou', 8.8], ['Wang Zhuocheng', 8.8], ['Cao Yuchen', 8.8], ['Arturo Castro', 8.8], ['Michael Stahl-David', 8.8], ['Pete Postlethwaite', 8.8], ['Taylor Geare', 8.8]]}, {'name': 'horror', 'id': 'horror', 'data': [['Ayane Sakura', 9.0], ['Marina Inoue', 9.0], ['Yui Ishikawa', 9.0], ['Kisho Taniyama', 9.0], ['Natsuki Hanae', 8.85], ['Huang Ziteng', 8.8], ['Ji Li', 8.8], ['Bowen Li', 8.8], ['Song Jiyang', 8.8], ['Xuan Lu', 8.8], ['Xiu Qing', 8.8], ['Xiao Zhan', 8.8], ['Carman Lee', 8.8], ['He Peng', 8.8], ['Wang Zhuocheng', 8.8], ['Wang Yizhou', 8.8], ['Wang Yifei', 8.8], ['Wang Yibo', 8.8], ['Wang Haoxuan', 8.8], ['Guo Cheng', 8.8], ['Feng Ming Jing', 8.8], ['Meng Ziyi', 8.8], ['Cao Junxiang', 8.8], ['Qi Peixin', 8.8], ['Liu Haikuan', 8.8], ['Lu Enjie', 8.8], ['Chen Zhuoxuan', 8.8], ['Lu Jianmin', 8.8], ['Liu Yinjun', 8.8], ['Cao Yuchen', 8.8], ['Zhang Jingtong', 8.8], ['Yu Bin', 8.8], ['Zheng Fanxing', 8.8], ['Zhu Zanjin', 8.8], ['Millie Bobby Brown', 8.7], ['Matthew Modine', 8.7], ['Akari Kito', 8.7], ['Aoi Yuki', 8.7], ['Hikaru Midorikawa', 8.7], ['Yoshitsugu Matsuoka', 8.7]]}, {'name': 'sport', 'id': 'sport', 'data': [['Scottie Pippen', 9.1], ['Michael Jordan', 9.1], ['Dennis Rodman', 9.1], ['Steve Kerr', 9.1], ['Phil Jackson', 9.1], ['Jurnee Smollett', 8.7], ['Dora Madison', 8.7], ['Grey Damon', 8.7], ['Aimee Teegarden', 8.7], ['Connie Britton', 8.7], ['Michael B. Jordan', 8.7], ['Kyle Chandler', 8.7], ['Matt Lauria', 8.7], ['Al Tantay', 8.6], ['Nikita Mazepin', 8.6], ['Nicholas Latifi', 8.6], ['Bodjie Pascua', 8.6], ['Antonio Giovinazzi', 8.6], ['Shamaine Buencamino', 8.6], ['John Lloyd Cruz', 8.6], ['Valtteri Bottas', 8.6], ['Janus Del Prado', 8.6], ['Harry Melling', 8.6], ['Khalil Ramos', 8.6], ['Esteban Ocon', 8.6], ['Carlos Sainz Jr.', 8.6], ['George Russell', 8.6], ['Charles Leclerc', 8.6], ['Marcin Dorociński', 8.6], ['Pierre Gasly', 8.6], ['Marc Santiago', 8.6], ['Chloe Pirrie', 8.6], ['Marielle Heller', 8.6], ['Sergio Pérez', 8.6], ['Anya Taylor-Joy', 8.6], ['Thomas Brodie-Sangster', 8.6], ['Melissa Mendez', 8.6], ['Moses Ingram', 8.6], ['Dimples Romana', 8.6], ['Daniel Ricciardo', 8.6]]}, {'name': 'documentation', 'id': 'documentation', 'data': [['Phil Jackson', 9.1], ['Steve Kerr', 9.1], ['Scottie Pippen', 9.1], ['Michael Jordan', 9.1], ['Dennis Rodman', 9.1], ['Mike Rinder', 9.0], ['Max Hughes', 8.9], ['Manwendra Tripathy', 8.9], ['Dr Pushpesh Pant', 8.9], ['Timothy Woods', 8.7], ['Justin Ashcraft', 8.7], ['Alexandra Lougheed', 8.7], ['Al Fields', 8.7], ['Jen Moones', 8.7], ['Valtteri Bottas', 8.6], ['Robby Wirramanda', 8.6], ['Annie Murphy', 8.6], ['Dimples Romana', 8.6], ['Al Tantay', 8.6], ['Noah Reid', 8.6], ['Rizwan Manji', 8.6], ['Antonio Giovinazzi', 8.6], ['Phillip Picardi', 8.6], ['Mark Pancer', 8.6], ['Pierre Gasly', 8.6], ['Sergio Pérez', 8.6], ['Dave Itzkoff', 8.6], ['Ahron Villena', 8.6], ['Shamaine Buencamino', 8.6], ['Nikita Mazepin', 8.6], ['Kathleen Zellner', 8.6], ['Troy', 8.6], ['Khalil Ramos', 8.6], ['Brooke Satchwell', 8.6], ['David Langer', 8.6], ['Sarah Levy', 8.6], ['Cameron Crowe', 8.6], ['Will Arnett', 8.6], ['Lewis Hamilton', 8.6], ['Ira Madison III', 8.6]]}, {'name': 'war', 'id': 'war', 'data': [['Naoya Uchida', 8.8], ['Yuto Uemura', 8.8], ['Kensho Ono', 8.8], ['Kim Hye-eun', 8.7], ['Kim Na-woon', 8.7], ['Lee Jung-hyun', 8.7], ['Yoon Ju-man', 8.7], ['David Lee McInnis', 8.7], ['Lee Seung-joon', 8.7], ['Jang Dong-yoon', 8.7], ['Bae Jung-nam', 8.7], ['Choi Moo-sung', 8.7], ['Kim Min-jung', 8.7], ['Choi Jin-ho', 8.7], ['Choi Jong-won', 8.7], ['Yoo Yeon-seok', 8.7], ['Kim Tae-ri', 8.7], ['Lim Cheol-soo', 8.7], ['Yukana', 8.7], ['Jeon Jin-seo', 8.7], ['Kim Nam-hee', 8.7], ['Shin Jung-geun', 8.7], ['Byun Yo-han', 8.7], ['Seo Yu-jeong', 8.7], ['Derek Jacobi', 8.7], ['Lee Jung-eun', 8.7], ['Seong Yu-bin', 8.7], ['Park Ah-in', 8.7], ['Nam Chang-hee', 8.7], ['Hakuryu', 8.7], ['Kim Eung-soo', 8.7], ['Kim Eui-sung', 8.7], ['Ami Koshimizu', 8.7], ['Kang Shin-il', 8.7], ['Oh Ah-yeon', 8.7], ['Jun Fukuyama', 8.7], ['Heo Jung-eun', 8.7], ['Kim Byung-chul', 8.7], ['Kim Kang-hoon', 8.7], ['Park Jeong-min', 8.7]]}, {'name': 'history', 'id': 'history', 'data': [['Phil Jackson', 9.1], ['Steve Kerr', 9.1], ['Michael Jordan', 9.1], ['Scottie Pippen', 9.1], ['Dennis Rodman', 9.1], ['Leah Remini', 9.0], ['Mike Rinder', 9.0], ['Niecy Nash', 8.9], ['Kylie Bunbury', 8.9], ['Marquis Rodriguez', 8.9], ['Aunjanue Ellis', 8.9], ['Manwendra Tripathy', 8.9], ['Ethan Herisse', 8.9], ['John Leguizamo', 8.9], ['Asante Blackk', 8.9], ['Caleel Harris', 8.9], ['Dr Pushpesh Pant', 8.9], ['Jharrel Jerome', 8.9], ['Hiroki Yasumoto', 8.8], ['Kensho Ono', 8.8], ['Akio Otsuka', 8.8], ['Naoya Uchida', 8.8], ['Choi Jin-ho', 8.7], ['Kim Kang-hoon', 8.7], ['Kim Hye-eun', 8.7], ['Kim Kap-soo', 8.7], ['Park Ah-in', 8.7], ['Kim Min-jung', 8.7], ['Kim Na-woon', 8.7], ['Kim Nam-hee', 8.7], ['Nam Chang-hee', 8.7], ['Kim Eui-sung', 8.7], ['Kim Si-eun', 8.7], ['Kim Tae-ri', 8.7], ['Kim Eung-soo', 8.7], ['Gillian Anderson', 8.7], ['Kim Byung-chul', 8.7], ['Yoon Ju-man', 8.7], ['Erin Doherty', 8.7], ['Jeon Jin-seo', 8.7]]}];

top_10_actors, drilldown_data = process_drilldown_data(originalDrilldownData)


In [None]:
drilldown_data

[{'name': 'comedy',
  'id': 'comedy',
  'data': ['Ahn Jae-hong',
   'Kim Sung-kyun',
   'Kim Seol',
   'Ra Mi-ran',
   'Park Bo-gum',
   'Ryu Hye-young',
   'Ryu Jun-yeol',
   'Lee Hye-ri',
   'Chisa Yokoyama',
   'Banjo Ginga',
   'Paul Riley',
   'Michael Richards',
   'Mark Cox',
   'Ford Kiernan',
   'Jason Alexander',
   'Jane McCarry',
   'Unsho Ishizuka',
   'Sanjeev Kohli',
   'Gavin Mitchell',
   'Aoi Tada',
   'Greg Hemphill',
   'Shin Hyun-bin',
   'Huang Ziteng',
   'He Peng',
   'Yu Bin',
   'Bowen Li',
   'Zhang Jingtong',
   'Liu Haikuan',
   'Moon Tae-Yoo',
   'Lu Jianmin',
   'Zheng Fanxing',
   'Cao Junxiang',
   'Jeon Mi-do',
   'Cao Yuchen',
   'Liu Yinjun',
   'Katsuhisa Hôki',
   'Wang Haoxuan',
   'Song Jiyang',
   'You Chea-myung',
   'Meng Ziyi']},
 {'name': 'european',
  'id': 'european',
  'data': ['Mark Cox',
   'Greg Hemphill',
   'Sanjeev Kohli',
   'Gavin Mitchell',
   'Paul Riley',
   'Jane McCarry',
   'Ford Kiernan',
   'Graham Chapman',
   'Michael Pa