Write a problem statement & describe the goals of your study to be included in the final report
Pull the data from the IMDB API
Scrape related IMDB data
Join API & scraped data in local Postgres
Use natural language processing to understand the sentiments of users reviewing the movies
Mine & refine your data
Construct bagging and boosting ensemble models
Construct elastic net models
Perform gridsearch and validation on models
Present the results of your findings in a formal report to Netflix, including:
a problem statement,
summary statistics of the various factors (year, number of ratings, etc.),
your random forest model,
and your recommendations for next steps!

# Executive Summary 

## Problem Statement
The assignment is to examine which factors lead to certain ratings for movies in order to predict that types of movies and individual may like. Netflix has not yet focused on examining the factors that have led to movies having been rated the top movies of all time.

## Goal
   Using machine learning techniques, specifically tree-based ensemble techniques (random forests, bagging, boosting, etc.) identify key factors which contribute to successful movie ratings, and present them using graphs and narratives. 
   
   ### Deliverables
     * Formal Problem Statement (included in this report)
     * Summary Statistics
     * The machine learning model use, with supporting code used to generate the findings
     * Graphics to support the findings
     * Recommendations for next steps
    *
    

## Load Libraries 

In [1]:
import pandas as pd
import numpy as np
from imdbpie import Imdb #if libabry not found, pip install imdbpie from command line 

from IPython.display import Image


Pull the data from the IMDB API

In [5]:
imdb = Imdb()
imdb_top = imdb.top_250()
#imdb.search_for_title("The Dark Knight")
imdb_top
data = pd.DataFrame(imdb_top, columns=['can_rate', 'image', 'num_votes', 'rating', 'tconst', 'title', 'type', 'year'])
data.loc[data['type']=='feature']
data['rating'].max()
data['rating'].min()
data['tconst'].head()
data['year'].min()
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:root@localhost:5432/test')
data.to_sql('movies', engine)


ProgrammingError: (psycopg2.ProgrammingError) can't adapt type 'dict' [SQL: 'INSERT INTO movies (index, can_rate, image, num_votes, rating, tconst, title, type, year) VALUES (%(index)s, %(can_rate)s, %(image)s, %(num_votes)s, %(rating)s, %(tconst)s, %(title)s, %(type)s, %(year)s)'] [parameters: ({'num_votes': 1757955, 'tconst': 'tt0111161', 'type': 'feature', 'can_rate': True, 'rating': 9.3, 'year': '1994', 'index': 0, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BODU4MjU4NjIwNl5BMl5BanBnXkFtZTgwMDU2MjEyMDE@._V1_.jpg', 'height': 1388, 'width': 933}, 'title': 'The Shawshank Redemption'}, {'num_votes': 1200556, 'tconst': 'tt0068646', 'type': 'feature', 'can_rate': True, 'rating': 9.2, 'year': '1972', 'index': 1, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BNTc0ZDk1YWItZDZiNi00NTdmLWE0MDctNTVhYTRhMDBmZjNjXkEyXkFqcGdeQXVyMjUzOTY1NTc@._V1_.jpg', 'height': 1000, 'width': 654}, 'title': 'The Godfather'}, {'num_votes': 824510, 'tconst': 'tt0071562', 'type': 'feature', 'can_rate': True, 'rating': 9.0, 'year': '1974', 'index': 2, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BOTE1MTBiYzYtMDI1OC00ZTUxLTg0ZWQtZjdjMzA0OTM1NGMwXkEyXkFqcGdeQXVyMjUzOTY1NTc@._V1_.jpg', 'height': 1000, 'width': 651}, 'title': 'The Godfather: Part II'}, {'num_votes': 1740984, 'tconst': 'tt0468569', 'type': 'feature', 'can_rate': True, 'rating': 9.0, 'year': '2008', 'index': 3, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BMTMxNTMwODM0NF5BMl5BanBnXkFtZTcwODAyMTk2Mw@@._V1_.jpg', 'height': 2048, 'width': 1383}, 'title': 'The Dark Knight'}, {'num_votes': 471759, 'tconst': 'tt0050083', 'type': 'feature', 'can_rate': True, 'rating': 8.9, 'year': '1957', 'index': 4, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BODQwOTc5MDM2N15BMl5BanBnXkFtZTcwODQxNTEzNA@@._V1_.jpg', 'height': 1023, 'width': 682}, 'title': '12 Angry Men'}, {'num_votes': 900862, 'tconst': 'tt0108052', 'type': 'feature', 'can_rate': True, 'rating': 8.9, 'year': '1993', 'index': 5, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BMzMwMTM4MDU2N15BMl5BanBnXkFtZTgwMzQ0MjMxMDE@._V1_.jpg', 'height': 679, 'width': 439}, 'title': "Schindler's List"}, {'num_votes': 1376620, 'tconst': 'tt0110912', 'type': 'feature', 'can_rate': True, 'rating': 8.9, 'year': '1994', 'index': 6, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BMTkxMTA5OTAzMl5BMl5BanBnXkFtZTgwNjA5MDc3NjE@._V1_.jpg', 'height': 1500, 'width': 1010}, 'title': 'Pulp Fiction'}, {'num_votes': 1262537, 'tconst': 'tt0167260', 'type': 'feature', 'can_rate': True, 'rating': 8.9, 'year': '2003', 'index': 7, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BMjE4MjA1NTAyMV5BMl5BanBnXkFtZTcwNzM1NDQyMQ@@._V1_.jpg', 'height': 519, 'width': 350}, 'title': 'The Lord of the Rings: The Return of the King'}  ... displaying 10 of 250 total bound parameter sets ...  {'num_votes': 510828, 'tconst': 'tt2084970', 'type': 'feature', 'can_rate': True, 'rating': 8.1, 'year': '2014', 'index': 248, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BNjI3NjY1Mjg3MV5BMl5BanBnXkFtZTgwMzk5MDQ3MjE@._V1_.jpg', 'height': 2048, 'width': 1393}, 'title': 'The Imitation Game'}, {'num_votes': 838736, 'tconst': 'tt0325980', 'type': 'feature', 'can_rate': True, 'rating': 8.0, 'year': '2003', 'index': 249, 'image': {'url': 'https://images-na.ssl-images-amazon.com/images/M/MV5BMjAyNDM4MTc2N15BMl5BanBnXkFtZTYwNDk0Mjc3._V1_.jpg', 'height': 475, 'width': 321}, 'title': 'Pirates of the Caribbean: The Curse of the Black Pearl'})]

Scrape related IMDB data
![title](https://i.imgur.com/pDq0n.png)
https://developers.themoviedb.org/3/people

In [12]:
top_actors = pd.read_csv("top_100_actors.csv")

top_actors.drop(['created', 'modified'],inplace=True,axis=1)
print(top_actors.columns)
top_actors.iloc[0]['description']

Index(['position', 'const', 'description', 'Name', 'Known for',
       'Birth date (month/day/year)'],
      dtype='object')


'Acting Abilities:     \n\nActing Skill - 5 Stars \nOverall Versatility - 5 Stars\nRole Transformation - 4.8 Stars\n\nAwards & Nominations:\n\nOscars: 3\nOscar Nominations: 12\nBAFTA Awards: 4\nBAFTA Nominations: 8\nGolden Globes: 6\nGolden Globe Nominations: 17\n\nGreatest Performances:\n\n"The Shining" - Level of Difficulty - 5 Stars\n"As Good as It Gets" - Level of Difficulty - 4.8 Stars\n"One Flew…….Cuckoo\'s Nest" - Level of Difficulty - 5 Stars'

Join API & scraped data in local Postgres

Use natural language processing to understand the sentiments of users reviewing the movies

Mine & refine your data

Construct bagging and boosting ensemble models

Construct elastic net models

Perform gridsearch and validation on models

Present the results of your findings in a formal report to Netflix, including:
  * a problem statement,
  * summary statistics of the various factors (year, number of ratings, etc.),
  * your random forest model,
  * and your recommendations for next steps!

In [167]:
imdb = Imdb({'anonymize': False,
             'locale': 'en_US',
             'exclude_episodes': False})

def movie_tests():
    print((movie.title))
#    print(('keywords', movie.tomatoes))
#    print(('rating votes', movie.rating.ratingvotes))
#    print(('FilmCountry', movie.FilmCountry))
    print(('type', movie.type))
    print(('tagline', movie.tagline))
    print(('rating', movie.rating))
    print(('certification', movie.certification))
    print(('genres', movie.genres))
    print(('runtime', movie.runtime))
    print(('writers summary', movie.writers_summary))
    print(('directors summary', movie.directors_summary))
    print(('creators', movie.creators))
    print(('cast summary', movie.cast_summary))
    print(('full credits', movie.credits))
    print(('cert', movie.certification))

#if __name__ == '__main__':
movie = imdb.get_title_by_id('tt0705926')
#movie_tests()
foo = imdb.search_for_title()


# x = 0
# for i in foo:
#     print(i['title'])
    
# print(x)

TypeError: search_for_title() missing 1 required positional argument: 'title'

In [140]:

def person_tests():
    print(('name',person.name))
    print(('name',person.name))
#    print(('firstname',person.firstname))
#    print(('gender',person.gender))
    #print(('directed',person.directed))
    #print(('acted',person.acted))
    #print(('filmography', person.filmography))
    #print(('type', person.type))
    #print(('tagline', person.tagline))
    #print(('rating', person.rating))
    #print(('certification', person.certification))
    #print(('genres', person.genres))
    #print(('runtime', person.runtime))
    #print(('writers summary', person.writers_summary))
    #print(('directors summary', person.directors_summary))
    #print(('creators', person.creators))
    #print(('cast summary', person.cast_summary))
    #print(('full credits', person.credits))
    #print(('cert', person.certification))
    
person = imdb.get_person_by_id("nm0000151")
person_tests()

('name', 'Morgan Freeman')


AttributeError: 'Person' object has no attribute 'firstname'