In [1]:
from pykalman import KalmanFilter
import numpy as np
import pandas as pd
import sys
import matplotlib
import matplotlib.pyplot as plt
from skimage.color import lab2rgb
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
import skimage
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from functools import reduce

In [2]:
wikidata = pd.read_json('movies/data/wikidata-movies.json.gz', orient='record', lines=True, encoding="utf8")
genres = pd.read_json('movies/data/genres.json.gz', orient='record', lines=True, encoding="utf8")

In [3]:
movies = wikidata[wikidata['made_profit'].notnull()]

In [4]:
def map_genre(row):
    result = []
    for genre_code in row['genre']:
        matches = genres[genres['wikidata_id'] == genre_code]['genre_label'].values
        for match in matches:
            result.append(match)
    return result

In [5]:
movies['genre_names'] = movies.apply(map_genre,axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [6]:
movies

Unnamed: 0,based_on,cast_member,country_of_origin,director,enwiki_title,filming_location,genre,imdb_id,label,made_profit,main_subject,metacritic_id,original_language,publication_date,rotten_tomatoes_id,series,wikidata_id,genre_names
6,,"[Q5126010, Q3390414, Q5676024, Q237021]",Q29,[Q51892574],Orbiter 9,,"[Q24925, Q21010853]",tt3469798,Orbiter 9,0.0,,,,2017-04-07,m/orbiter_9,,Q42577704,"[science fiction, drama]"
69,,,Q30,"[Q3384479, Q351884]",Despicable Me,,[Q157443],tt1323594,Despicable Me,1.0,,movie/despicable-me,Q1860,2010-06-20,m/1214097,,Q4447,[comedy film]
73,,"[Q386349, Q1605965, Q3805579, Q271162, Q463226...",Q30,[Q2071],Eraserhead,[Q99],"[Q130232, Q200092, Q5967378]",tt0074486,Eraserhead,1.0,[Q906343],,Q1860,1977-01-01,m/eraserhead,,Q11618,"[drama film, horror film, speculative fiction]"
81,Q17017426,"[Q117500, Q1376880, Q11930, Q311169, Q951634, ...",Q30,[Q11930],Dances with Wolves,[Q1558],"[Q130232, Q369747, Q21590660, Q21010853, Q319221]",tt0099348,Dances with Wolves,1.0,,movie/dances-with-wolves,Q1860,1990-11-09,m/dances_with_wolves,,Q20456,"[drama film, war film, Western, drama, adventu..."
84,,"[Q38111, Q211553, Q177311, Q8927, Q173399, Q20...",Q145,[Q25191],Inception,"[Q99, Q387047, Q17, Q90, Q1951, Q7275217, Q126...","[Q496523, Q471839, Q2484376, Q188473, Q319221]",tt1375666,Inception,1.0,"[Q544830, Q4425624]",movie/inception,Q1860,2010-07-08,m/inception,,Q25188,"[heist film, science fiction film, thriller fi..."
92,,"[Q229313, Q445772, Q727988, Q3163137, Q1372392...",Q16,[Q6385039],Mama (2013 film),"[Q172, Q133116, Q13939]",[Q200092],tt2023587,Mama,1.0,[Q80837],movie/mama,Q1860,2013-01-17,m/mama_2013,,Q29446,[horror film]
113,Q243556,"[Q34012, Q41163, Q95043, Q464714, Q171736, Q32...",Q30,[Q56094],The Godfather,"[Q18438, Q60, Q1408, Q1460]","[Q130232, Q959790, Q7444356, Q21010853, Q52162...",tt0068646,The Godfather,1.0,[Q46952],movie/the-godfather,Q1860,1972-03-15,m/godfather,Q3225260,Q47703,"[drama film, crime film, gangster film, drama,..."
125,,"[Q483118, Q23547, Q108283, Q215072, Q270664, Q...",Q30,[Q483118],Argo (2012 film),"[Q406, Q65, Q43]","[Q622291, Q18620604]",tt1024648,Argo,1.0,"[Q589673, Q1032059, Q23036198]",movie/argo,Q1860,2012-01-01,m/argo_2012,,Q59653,"[political thriller, dramatization]"
203,Q7857661,"[Q317343, Q57147, Q244674, Q343616, Q208649, Q...",Q145,[Q706475],12 Years a Slave (film),[Q34404],"[Q130232, Q645928, Q52162262]",tt2024544,12 Years a Slave,1.0,"[Q118382, Q8461]",movie/12-years-a-slave,Q1860,2013-08-30,m/12_years_a_slave,,Q3023357,"[drama film, biographical film, film based on ..."
483,,"[Q295803, Q200534, Q228865, Q200405, Q314133, ...",Q145,[Q191755],Only Lovers Left Alive,"[Q183, Q365]","[Q130232, Q1054574, Q2137852]",tt1714915,Only Lovers Left Alive,1.0,[Q46721],movie/only-lovers-left-alive,Q1860,2013-05-25,m/only_lovers_left_alive,,Q3352751,"[drama film, romance film, vampire film]"


In [7]:
rotten_tomatoes = pd.read_json('movies/data/rotten-tomatoes.json.gz', orient='record', lines=True)

In [8]:
omdb = pd.read_json('movies/data/omdb-data.json.gz', orient='record', lines=True)

In [9]:
combined = movies.join(rotten_tomatoes.set_index('rotten_tomatoes_id'), on='rotten_tomatoes_id', rsuffix='_rt')

In [10]:
combined = movies.join(omdb.set_index('imdb_id'), on='imdb_id')

In [11]:
combined

Unnamed: 0,based_on,cast_member,country_of_origin,director,enwiki_title,filming_location,genre,imdb_id,label,made_profit,...,metacritic_id,original_language,publication_date,rotten_tomatoes_id,series,wikidata_id,genre_names,omdb_awards,omdb_genres,omdb_plot
6,,"[Q5126010, Q3390414, Q5676024, Q237021]",Q29,[Q51892574],Orbiter 9,,"[Q24925, Q21010853]",tt3469798,Orbiter 9,0.0,...,,,2017-04-07,m/orbiter_9,,Q42577704,"[science fiction, drama]",2 nominations.,"[Drama, Romance, Sci-Fi]",Helena is a young girl who spent all her life ...
69,,,Q30,"[Q3384479, Q351884]",Despicable Me,,[Q157443],tt1323594,Despicable Me,1.0,...,movie/despicable-me,Q1860,2010-06-20,m/1214097,,Q4447,[comedy film],Nominated for 1 Golden Globe. Another 3 wins &...,"[Animation, Adventure, Comedy]",In a happy suburban neighborhood surrounded by...
73,,"[Q386349, Q1605965, Q3805579, Q271162, Q463226...",Q30,[Q2071],Eraserhead,[Q99],"[Q130232, Q200092, Q5967378]",tt0074486,Eraserhead,1.0,...,,Q1860,1977-01-01,m/eraserhead,,Q11618,"[drama film, horror film, speculative fiction]",2 wins & 1 nomination.,[Horror],A film that defies conventional logic and stor...
81,Q17017426,"[Q117500, Q1376880, Q11930, Q311169, Q951634, ...",Q30,[Q11930],Dances with Wolves,[Q1558],"[Q130232, Q369747, Q21590660, Q21010853, Q319221]",tt0099348,Dances with Wolves,1.0,...,movie/dances-with-wolves,Q1860,1990-11-09,m/dances_with_wolves,,Q20456,"[drama film, war film, Western, drama, adventu...",Won 7 Oscars. Another 43 wins & 37 nominations.,"[Adventure, Drama, Western]",Lt. John Dunbar is dubbed a hero after he acci...
84,,"[Q38111, Q211553, Q177311, Q8927, Q173399, Q20...",Q145,[Q25191],Inception,"[Q99, Q387047, Q17, Q90, Q1951, Q7275217, Q126...","[Q496523, Q471839, Q2484376, Q188473, Q319221]",tt1375666,Inception,1.0,...,movie/inception,Q1860,2010-07-08,m/inception,,Q25188,"[heist film, science fiction film, thriller fi...",Won 4 Oscars. Another 152 wins & 204 nominations.,"[Action, Adventure, Sci-Fi]","Dom Cobb is a skilled thief, the absolute best..."
92,,"[Q229313, Q445772, Q727988, Q3163137, Q1372392...",Q16,[Q6385039],Mama (2013 film),"[Q172, Q133116, Q13939]",[Q200092],tt2023587,Mama,1.0,...,movie/mama,Q1860,2013-01-17,m/mama_2013,,Q29446,[horror film],11 wins & 18 nominations.,"[Horror, Thriller]","The senior partner of an investment brokerage,..."
113,Q243556,"[Q34012, Q41163, Q95043, Q464714, Q171736, Q32...",Q30,[Q56094],The Godfather,"[Q18438, Q60, Q1408, Q1460]","[Q130232, Q959790, Q7444356, Q21010853, Q52162...",tt0068646,The Godfather,1.0,...,movie/the-godfather,Q1860,1972-03-15,m/godfather,Q3225260,Q47703,"[drama film, crime film, gangster film, drama,...",Won 3 Oscars. Another 24 wins & 28 nominations.,"[Crime, Drama]",When the aging head of a famous crime family d...
125,,"[Q483118, Q23547, Q108283, Q215072, Q270664, Q...",Q30,[Q483118],Argo (2012 film),"[Q406, Q65, Q43]","[Q622291, Q18620604]",tt1024648,Argo,1.0,...,movie/argo,Q1860,2012-01-01,m/argo_2012,,Q59653,"[political thriller, dramatization]",Won 3 Oscars. Another 94 wins & 152 nominations.,"[Biography, Drama, Thriller]","In 1979, the American embassy in Iran was inva..."
203,Q7857661,"[Q317343, Q57147, Q244674, Q343616, Q208649, Q...",Q145,[Q706475],12 Years a Slave (film),[Q34404],"[Q130232, Q645928, Q52162262]",tt2024544,12 Years a Slave,1.0,...,movie/12-years-a-slave,Q1860,2013-08-30,m/12_years_a_slave,,Q3023357,"[drama film, biographical film, film based on ...",Won 3 Oscars. Another 235 wins & 326 nominations.,"[Biography, Drama, History]",Based on an incredible true story of one man's...
483,,"[Q295803, Q200534, Q228865, Q200405, Q314133, ...",Q145,[Q191755],Only Lovers Left Alive,"[Q183, Q365]","[Q130232, Q1054574, Q2137852]",tt1714915,Only Lovers Left Alive,1.0,...,movie/only-lovers-left-alive,Q1860,2013-05-25,m/only_lovers_left_alive,,Q3352751,"[drama film, romance film, vampire film]",9 wins & 42 nominations.,"[Comedy, Drama, Fantasy]","Adam (Tom Hiddleston), an underground musician..."
