In [40]:
import pandas as pd
import numpy as np

In [41]:
df_movies_metadata = pd.read_csv('movies_metadata.csv')

In [42]:
print(df_movies_metadata.columns)

Index([u'adult', u'belongs_to_collection', u'budget', u'genres', u'homepage',
       u'id', u'imdb_id', u'original_language', u'original_title', u'overview',
       u'popularity', u'poster_path', u'production_companies',
       u'production_countries', u'release_date', u'revenue', u'runtime',
       u'spoken_languages', u'status', u'tagline', u'title', u'video',
       u'vote_average', u'vote_count'],
      dtype='object')


In [43]:
df_movies_metadata.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033,81,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249,104,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0,101,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,1995-12-22,81452156,127,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,1995-02-10,76578911,106,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173


In [44]:
df_movies_metadata.shape

(45466, 24)

In [45]:
df_movies_metadata.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 45466 entries, 0 to 45465
Data columns (total 24 columns):
adult                    45466 non-null object
belongs_to_collection    4494 non-null object
budget                   45466 non-null object
genres                   45466 non-null object
homepage                 7782 non-null object
id                       45466 non-null object
imdb_id                  45449 non-null object
original_language        45455 non-null object
original_title           45466 non-null object
overview                 44512 non-null object
popularity               45461 non-null object
poster_path              45080 non-null object
production_companies     45463 non-null object
production_countries     45463 non-null object
release_date             45379 non-null object
revenue                  45460 non-null float64
runtime                  45203 non-null float64
spoken_languages         45460 non-null object
status                   45379 non-null objec

In [46]:
# Remove features that are not useful
df_movies_metadata = df_movies_metadata.drop(['imdb_id'], axis=1)

In [47]:
df_movies_metadata[df_movies_metadata['original_title'] != df_movies_metadata['title']][['title', 'original_title']].head()

Unnamed: 0,title,original_title
28,The City of Lost Children,La Cité des Enfants Perdus
29,Shanghai Triad,摇啊摇，摇到外婆桥
32,Wings of Courage,"Guillaumet, les ailes du courage"
57,The Postman,Il postino
58,The Confessional,Le confessionnal


In [48]:
df_movies_metadata = df_movies_metadata.drop('original_title', axis=1)

In [49]:
df_movies_metadata['revenue'] = df_movies_metadata['revenue'].replace(0, np.nan)

In [50]:
df_movies_metadata['budget'] = pd.to_numeric(df_movies_metadata['budget'], errors='coerce')
df_movies_metadata['budget'] = df_movies_metadata['budget'].replace(0, np.nan)
df_movies_metadata[df_movies_metadata['budget'].isnull()].shape

(36576, 22)

In [51]:
# Drop Adult movies
df_movies_metadata['adult'].value_counts()

False                                                                                                                             45454
True                                                                                                                                  9
 Avalanche Sharks tells the story of a bikini contest that turns into a horrifying affair when it is hit by a shark avalanche.        1
 Rune Balot goes to a casino connected to the October corporation to try to wrap up her case once and for all.                        1
 - Written by Ørnås                                                                                                                   1
Name: adult, dtype: int64

In [52]:
df_movies_metadata = df_movies_metadata.drop('adult', axis=1)

In [53]:
base_poster_url = 'http://image.tmdb.org/t/p/w185/'
df_movies_metadata['poster_path'] = base_poster_url + df_movies_metadata['poster_path']

In [54]:
# create the year feature
df_movies_metadata['year'] = pd.to_datetime(df_movies_metadata['release_date'], errors='coerce').apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)

In [55]:
# create the return feature
df_movies_metadata['return'] = df_movies_metadata['revenue'] / df_movies_metadata['budget']
df_movies_metadata[df_movies_metadata['return'].isnull()].shape
# Let's explore our dataset again
df_movies_metadata.head()

Unnamed: 0,belongs_to_collection,budget,genres,homepage,id,original_language,overview,popularity,poster_path,production_companies,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,return
0,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000.0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,en,"Led by Woody, Andy's toys live happily in his ...",21.9469,http://image.tmdb.org/t/p/w185//rhIRbceoE9lR4v...,"[{'name': 'Pixar Animation Studios', 'id': 3}]",...,81,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415,1995,12.451801
1,,65000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,en,When siblings Judy and Peter discover an encha...,17.0155,http://image.tmdb.org/t/p/w185//vzmL6fP7aPKNKP...,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...",...,104,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413,1995,4.043035
2,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,en,A family wedding reignites the ancient feud be...,11.7129,http://image.tmdb.org/t/p/w185//6ksm1sjKMFLbO7...,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...",...,101,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92,1995,
3,,16000000.0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,en,"Cheated on, mistreated and stepped on, the wom...",3.85949,http://image.tmdb.org/t/p/w185//16XOMpEaLWkrcP...,[{'name': 'Twentieth Century Fox Film Corporat...,...,127,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34,1995,5.09076
4,"{'id': 96871, 'name': 'Father of the Bride Col...",,"[{'id': 35, 'name': 'Comedy'}]",,11862,en,Just when George Banks has recovered from his ...,8.38752,http://image.tmdb.org/t/p/w185//e64sOI48hQXyru...,"[{'name': 'Sandollar Productions', 'id': 5842}...",...,106,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173,1995,


In [56]:
# Filter only on Science Fiction and Fantasy
df_movies_sff = df_movies_metadata[df_movies_metadata['genres'].str.contains("Science Fiction|Fantasy")]
df_movies_sff.shape

(4844, 23)

In [57]:
# Let's explore our dataset again
df_movies_sff.head()

Unnamed: 0,belongs_to_collection,budget,genres,homepage,id,original_language,overview,popularity,poster_path,production_companies,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,return
1,,65000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,en,When siblings Judy and Peter discover an encha...,17.0155,http://image.tmdb.org/t/p/w185//vzmL6fP7aPKNKP...,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...",...,104,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413,1995,4.043035
23,,,"[{'id': 18, 'name': 'Drama'}, {'id': 14, 'name...",,12665,en,Harassed by classmates who won't accept his sh...,12.1331,http://image.tmdb.org/t/p/w185//1uRKsxOCtgz0xV...,"[{'name': 'Caravan Pictures', 'id': 175}, {'na...",...,111,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,An extraordinary encounter with another human ...,Powder,False,6.3,143,1995,
28,,18000000.0,"[{'id': 14, 'name': 'Fantasy'}, {'id': 878, 'n...",,902,fr,A scientist in a surrealist society kidnaps ch...,9.82242,http://image.tmdb.org/t/p/w185//eVo6ewq4akfyJY...,"[{'name': 'Procirep', 'id': 311}, {'name': 'Co...",...,108,"[{'iso_639_1': 'cn', 'name': '广州话 / 廣州話'}, {'i...",Released,Where happily ever after is just a dream.,The City of Lost Children,False,7.6,308,1995,0.096589
31,,29500000.0,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",,63,en,"In the year 2035, convict James Cole reluctant...",12.2973,http://image.tmdb.org/t/p/w185//6Sj9wDu3YugthX...,"[{'name': 'Universal Pictures', 'id': 33}, {'n...",...,129,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,The future is history.,Twelve Monkeys,False,7.4,2470,1995,5.72339
33,"{'id': 9435, 'name': 'Babe Collection', 'poste...",30000000.0,"[{'id': 14, 'name': 'Fantasy'}, {'id': 18, 'na...",,9598,en,Babe is a little pig who doesn't quite know hi...,14.4048,http://image.tmdb.org/t/p/w185//gN6X3fwPya8pLf...,"[{'name': 'Universal Pictures', 'id': 33}, {'n...",...,89,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,A little pig goes a long way.,Babe,False,6.0,756,1995,8.471164


In [58]:
# Sub-genre List
# - alien (Alien)
# - monster (Monster)
# - space (Space Opera)
# - robot (AI/Robot)
# - time_travel (Time Travel)
# - super (Superheroes)
# - dystopian (Dystopian)
# - fantasy (Fantasy)
# - techno (Technology Thriller)
# Approach: Create new column called sub_genres
# if overview column contains word, append that word + '/' to the cell in sub_genres

# Create new column called sub_genres
df_movies_sff['sub_genres'] = ''

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [59]:
def search(df, words):  #1
    """
    Return a sub-DataFrame of those rows whose column match any of the words.
    """
    return df[np.logical_or.reduce([df.select_dtypes(include=[object]).stack().\
                                    str.contains(word, case=False, na=False).unstack().any(1) for word in words])]   # 2

In [65]:
alien_keywords = [
  'saucer',
  'alien',
  'life',
  'human',
  'extraterrestrial',
  'ufo',
  'parasite',
  'meteorite',
  'meteor',
  'ET',
  'saucers',
  'aliens',
  'extraterrestrials',
  'ufos',
  'parasites',
  'meteorites',
  'meteors',
  'supernatural',
  'xenomorph',
  'area 51',
  'xenomorphs',
]

space_keywords = [
  'luke',
  'star',
  'spaceship',
  'battle',
  'interstellar',
  'planet',
  'enterprise',
  'skywalker',
  'darth',
  'droid'
  'earth',
  'spock',
  'kirk',
  'space',
  'capt',
  'moon',
  'spacecraft',
  'crew',
  'comet',
  'mars',
  'galaxy', 'nebula', 'astronaut', 'galaxies', 'nebulas', 'astronauts', 'stars', 'spaceships', 'battles', 'planets', 'droids', 'moons', 'spacecrafts', 'comets', 'jupiter', 'pluto', 'venus', 'sun', 'suns', 'neptune', 'asteroid', 'asteroids', 'meteor', 'meteors', 'meteorites', 'meteorite', 'mercury', 'antimatter', 'black hole', 'celestial', 'comet', 'comets', 'cosmic', 'constellation', 'exoplanet', 'lodestar', 'milky way', 'nova', 'supernova', 'pulsar', 'quasar', 'satellite', 'satellites', 'solar', 'world', 'eclipse', 'universe', 'interplanetary',
]

robot_keywords = [
  'cop',
  'ai',
  'cyborg',
  'terminator',
  'army',
  'android',
  'weapon',
  'robot',
  'intelligence',
  'machine',
  'artificial', 'mech', 'machina', 'mecha', 'cyborgs', 'terminators', 'armies', 'androids', 'droid', 'droids', 'weapons', 'robots', 'machines', 'robotic', 'robotics', 'automaton', 'automatons', 'bot', 'bots', 'bionic', 'bionics', 'mechanism', 'mechanisms',

]

time_travel_keywords = [
  'travel',
  'time',
  'back',
  'frozen',
  'spacetime', 'eternalism', 'futurity', 'future', 'past',

]

super_keywords = [
  'marvel',
  'superman',
  'superpower',
  'super',
  'powers', 'ability', 'abilities', 'dc', 'gifted', 'x-men', 'hero', 'heroes',
]

dystopian_keywords = ['virus', 'future', 'nuclear', 'city', 'epidemic', 'prison', 'dark', 'murder', 'evil', 'violence', 'experiment'\
                      'totalitarian', 'society', 'post-apocalyptic', 'dystopian', 'experiments', 'sinister', 'futuristic', \
                      'radiation', 'biblical', 'mysterious', 'military', 'spy', 'heavenly', 'cloning', 'nazi', 'reich',\
                      'death', 'war', 'drug', 'wasteland',
                      'mutant', 'mutants', 'mutation', 'mutations', 'viruses', 'epidemics', 'clone', 'communism', 'communist', 'communists', 'dystopia', 'clones', 'wastelands', 'apocalypse', 'alternate', 'reality', 'bleak', 'dead', 'zombie', 'zombies', 'apes', 'drought', 'lack', 'dwindling', 'atmosphere', 'cyberpunk', 'political', 'unethical', 'raiders', 'raider',

]

monster_keywords = ['frankenstein', 'vampire', 'werewolf', 'shark', 'spider', 'zombie', 'blob', 'creature', 'troll', 'gypsy', 'ape'\
                      'dinosaur', 'jurassic', 'scientist', 'swamp', 'dinosaurs', 'monster', 'octopus', 'godzilla', 'scorpion',\
                      'ants', 'golem', 'giants', 'serpent', 'crocodile', 'bermuda', 'mutant', 'mutants', 'mutation', 'mutations', 'vampires', 'werewolves', 'sharks', 'spiders', 'zombies', 'dead', 'creatures', 'apes', 'scientists', 'evil', 'swamps', 'monsters', 'octopi', 'scorpions', 'reptile', 'reptilian', 'golems', 'giant', 'hulking', 'serpentine', 'crocodiles', 'genetic', 'engineered', 'aberration', 'beast', 'beasts', 'humanoid', 'humanoids', 'folklore', 'spirit', 'spirits', 'freak', 'demon', 'freaks', 'demons', 'ogre', 'ogres', 'orc', 'orcs', 'elf', 'elves', 'bogeyman', 'behemoth', 'behemoths', 'monstrosity', 'monstrosities', 'colossal', 'colossus', 'leviathan', 'leviathans', 'dracula', 'monstrous', 'ghost', 'ghosts', 'specter', 'specters', 'phantom', 'phantoms', 'apparition', 'apparitions', 'wraith', 'wraiths', 'chimera', 'chimeras',

]

techno_keywords = ['computer', 'cyber', 'matrix', 'chip', 'network', 'serum', 'data', 'virtual', 'reality', 'laser', 'video', 'game'\
                      'player', 'tech', 'memory', 'database', 'computers', 'program', 'matrices', 'databases', 'serums', 'technology', 'technologies', 'code', 'coded', 'codes', 'coding', 'coder', 'coders', 'simulator', 'simulation', 'simulators', 'simulations', 'programs', 'gadget', 'gadgets', 'electronic', 'electronics', 'cyberpunk', 'cyber', 'cybers', 'nano', 'nanotechnology', 'nanotechnologies', 'biotech', 'biotechnology', 'biotechnologies', 'software', 'softwares', 'digital',

]

fantasy_keywords = ['magic', 'magical', 'wizard', 'wizards', 'witch', 'witches', 'wizardry', 'spells', 'spell casting', 'spell caster', 'wand', 'wands', 'hobbit', 'elf', 'elves', 'orc', 'orcs', 'dragon', 'dragons', 'quest', 'quests', 'dwarves', 'dwarven', 'dwarf', 'hobbits', 'elven', 'cursed', 'curse', 'curses', 'goblin', 'goblins', 'spirit', 'spirits', 'wizarding', 'enchant', 'enchantment', 'enchantments', 'enchanted', 'enchanting', 'potion', 'potions', 'alchemy', 'alchemist', 'chemist', 'apothecary', 'elder', 'elders', 'summoner']

In [66]:
df_movies_sff.head()

Unnamed: 0,belongs_to_collection,budget,genres,homepage,id,original_language,overview,popularity,poster_path,production_companies,...,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,return,sub_genres
1,,65000000.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,en,When siblings Judy and Peter discover an encha...,17.0155,http://image.tmdb.org/t/p/w185//vzmL6fP7aPKNKP...,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...",...,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413,1995,4.043035,
23,,,"[{'id': 18, 'name': 'Drama'}, {'id': 14, 'name...",,12665,en,Harassed by classmates who won't accept his sh...,12.1331,http://image.tmdb.org/t/p/w185//1uRKsxOCtgz0xV...,"[{'name': 'Caravan Pictures', 'id': 175}, {'na...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,An extraordinary encounter with another human ...,Powder,False,6.3,143,1995,,
28,,18000000.0,"[{'id': 14, 'name': 'Fantasy'}, {'id': 878, 'n...",,902,fr,A scientist in a surrealist society kidnaps ch...,9.82242,http://image.tmdb.org/t/p/w185//eVo6ewq4akfyJY...,"[{'name': 'Procirep', 'id': 311}, {'name': 'Co...",...,"[{'iso_639_1': 'cn', 'name': '广州话 / 廣州話'}, {'i...",Released,Where happily ever after is just a dream.,The City of Lost Children,False,7.6,308,1995,0.096589,
31,,29500000.0,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",,63,en,"In the year 2035, convict James Cole reluctant...",12.2973,http://image.tmdb.org/t/p/w185//6Sj9wDu3YugthX...,"[{'name': 'Universal Pictures', 'id': 33}, {'n...",...,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,The future is history.,Twelve Monkeys,False,7.4,2470,1995,5.72339,
33,"{'id': 9435, 'name': 'Babe Collection', 'poste...",30000000.0,"[{'id': 14, 'name': 'Fantasy'}, {'id': 18, 'na...",,9598,en,Babe is a little pig who doesn't quite know hi...,14.4048,http://image.tmdb.org/t/p/w185//gN6X3fwPya8pLf...,"[{'name': 'Universal Pictures', 'id': 33}, {'n...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,A little pig goes a long way.,Babe,False,6.0,756,1995,8.471164,


In [67]:
# search(df_movies_sff, 'saucer', 'alien', 'life', 'human', 'extraterrestrial', 'ufo', 'parasite'\
                        #'meteorite', 'meteor').index
search(df_movies_sff, ['saucer', 'alien', 'life', 'human', 'extraterrestrial', 'ufo', 'parasite'\
                        'meteorite', 'meteor']).index

Int64Index([   23,    43,    59,    75,   155,   157,   169,   193,   198,
              224,
            ...
            45191, 45203, 45337, 45346, 45353, 45356, 45375, 45429, 45444,
            45445],
           dtype='int64', length=1505)

In [68]:
df_movies_sff.loc[search(df_movies_sff, alien_keywords).index, 'sub_genres'] = 'alien/'

In [69]:
df_movies_sff.loc[search(df_movies_sff, space_keywords).index, 'sub_genres'] \
= df_movies_sff['sub_genres'].astype(str) + 'space/'

In [70]:
df_movies_sff.loc[search(df_movies_sff, robot_keywords).index, 'sub_genres'] \
= df_movies_sff['sub_genres'].astype(str) + 'robot/'

In [71]:
df_movies_sff.loc[search(df_movies_sff, time_travel_keywords).index, 'sub_genres'] \
= df_movies_sff['sub_genres'].astype(str) + 'time_travel/'

In [72]:
df_movies_sff.loc[search(df_movies_sff, super_keywords).index, 'sub_genres'] \
= df_movies_sff['sub_genres'].astype(str) + 'super/'

In [73]:
df_movies_sff.loc[search(df_movies_sff, dystopian_keywords).index, 'sub_genres'] \
= df_movies_sff['sub_genres'].astype(str) + 'dystopian/'

In [74]:
df_movies_sff.loc[df_movies_sff['genres'].str.contains("Fantasy", case='False', na=False), 'sub_genres'] = df_movies_sff['sub_genres'].astype(str) + 'fantasy/'

In [75]:
df_movies_sff.loc[search(df_movies_sff, monster_keywords).index, 'sub_genres'] \
= df_movies_sff['sub_genres'].astype(str) + 'monster/'

In [76]:
df_movies_sff.loc[search(df_movies_sff, techno_keywords).index, 'sub_genres'] \
= df_movies_sff['sub_genres'].astype(str) + 'techno/'

In [77]:
df_movies_sff['sub_genres'].value_counts()

alien/space/robot/dystopian/monster/                              126
alien/space/robot/time_travel/dystopian/monster/                  125
alien/space/robot/time_travel/dystopian/fantasy/monster/           98
alien/robot/time_travel/dystopian/monster/                         77
alien/robot/fantasy/                                               76
alien/robot/time_travel/dystopian/fantasy/monster/                 75
alien/space/robot/dystopian/fantasy/monster/                       75
alien/space/dystopian/monster/                                     67
alien/robot/dystopian/fantasy/monster/                             63
alien/space/robot/dystopian/                                       62
alien/robot/dystopian/monster/                                     60
alien/fantasy/                                                     60
alien/space/robot/time_travel/dystopian/                           58
alien/space/robot/                                                 57
fantasy/            

In [78]:
# number of empty sub_genres remaining
(df_movies_sff['sub_genres'].values == '').sum()

21

In [79]:
# we still need to do some work here... label the rest of the movies with sub_genres
df_movies_sff.loc[df_movies_sff['sub_genres'].values=='']

Unnamed: 0,belongs_to_collection,budget,genres,homepage,id,original_language,overview,popularity,poster_path,production_companies,...,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,return,sub_genres
4805,,,"[{'id': 18, 'name': 'Drama'}, {'id': 878, 'nam...",,33356,en,The exploration of the effects of an unexpecte...,0.866004,http://image.tmdb.org/t/p/w185//pJ5ftKZSO7by9G...,"[{'name': 'British Film Institute (BFI)', 'id'...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,The Falls,False,8.0,9,1980,,
9421,,,"[{'id': 28, 'name': 'Action'}, {'id': 27, 'nam...",,100594,fi,,0.203204,http://image.tmdb.org/t/p/w185//juhxfyFyQ7MXFL...,[],...,"[{'iso_639_1': 'fi', 'name': 'suomi'}]",Released,,The Book of Fate,False,3.5,2,2003,,
11324,,,"[{'id': 12, 'name': 'Adventure'}, {'id': 27, '...",,3593,en,Four explorers are summoned to Peru by the bri...,0.662919,http://image.tmdb.org/t/p/w185//5bUdwNdumc2cPz...,"[{'name': 'Paramount Pictures', 'id': 4}]",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Dr. Cyclops,False,6.1,15,1940,,
11584,,,"[{'id': 18, 'name': 'Drama'}, {'id': 878, 'nam...",,3024,en,No overview found.,0.424452,http://image.tmdb.org/t/p/w185//8byiRQ9kv2TUGn...,[],...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,The Strange Case of Dr. Jekyll and Mr. Hyde,False,0.0,0,1968,,
13024,,,"[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...",,2397,en,No overview found.,1.68743,http://image.tmdb.org/t/p/w185//zU9hKyOjiR8NUp...,[],...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Fallout,False,0.0,0,1998,,
14442,,,"[{'id': 27, 'name': 'Horror'}, {'id': 878, 'na...",,112687,en,The waters surrounding an island become contam...,0.842879,http://image.tmdb.org/t/p/w185//tuyJVMke3D9esB...,"[{'name': 'Tigon British Film Productions', 'i...",...,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,An ecological nightmare gone berserk!,Doomwatch,False,7.4,7,1972,,
15360,,,"[{'id': 18, 'name': 'Drama'}, {'id': 878, 'nam...",,8652,ko,"In the aftermath of a car crash, a man discove...",0.862305,http://image.tmdb.org/t/p/w185//fwGaqe9YWSHSQZ...,"[{'name': 'Kim Ki-Duk Film', 'id': 684}, {'nam...",...,"[{'iso_639_1': 'ko', 'name': '한국어/조선말'}]",Released,A dream is a dream is a dream.,Dream,False,6.2,11,2008,,
16391,,,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,21136,ja,The Taste of Tea director Katsuhito Ishii coll...,1.49222,http://image.tmdb.org/t/p/w185//vLCLqZugcLEhoP...,"[{'name': 'Aoi Promotion', 'id': 39804}]",...,"[{'iso_639_1': 'ja', 'name': '日本語'}]",Released,,Funky Forest: The First Contact,False,6.0,15,2005,,
19253,,,"[{'id': 35, 'name': 'Comedy'}, {'id': 878, 'na...",,49084,en,A French comedy with comedians Eric &amp; Ramz...,2.6692,http://image.tmdb.org/t/p/w185//rdPGeN2PXbmhBh...,[],...,[],Released,,Steak,False,5.1,51,2007,,
20117,,,"[{'id': 18, 'name': 'Drama'}, {'id': 9648, 'na...",,177522,fi,,0.073825,http://image.tmdb.org/t/p/w185//oAUq5rhZEjDz9h...,[],...,"[{'iso_639_1': 'fi', 'name': 'suomi'}]",Released,,Lipton Cockton in the Shadows of Sodoma,False,6.0,1,1995,,


In [82]:
df_movies_sff.to_csv('movies_metadata_scififantasy_12-1-2018.csv',sep=',')