<b>Import dependencies

In [1]:
import requests
import json
from pprint import pprint
import pymongo
from pymongo import MongoClient
import csv
import pandas as pd

<h2>Getting Star Wars Data From OMDB

<b>OMDB API url

In [2]:
url = "http://www.omdbapi.com/?i="
api_key = "&apikey=9fc25409"

<b>OMDB movie ids for the first three Star Wars films

In [3]:
movie_list = ['tt0076759','tt0080684','tt0086190']

In [4]:
# confirm url is working
response = requests.get(url + movie_list[0] + api_key)
print(response.url)

http://www.omdbapi.com/?i=tt0076759&apikey=9fc25409


<b>List with OMDB movie information in dictionaries

In [5]:
sw_data = []

for movie in movie_list:
    omdb_url = requests.get(url + movie + api_key)
    data = omdb_url.json()
    sw_data.append(data)

pprint(sw_data)

[{'Actors': 'Mark Hamill, Harrison Ford, Carrie Fisher, Peter Cushing',
  'Awards': 'Won 6 Oscars. Another 50 wins & 28 nominations.',
  'BoxOffice': 'N/A',
  'Country': 'USA',
  'DVD': '21 Sep 2004',
  'Director': 'George Lucas',
  'Genre': 'Action, Adventure, Fantasy, Sci-Fi',
  'Language': 'English',
  'Metascore': '90',
  'Plot': 'Luke Skywalker joins forces with a Jedi Knight, a cocky pilot, a '
          "Wookiee and two droids to save the galaxy from the Empire's "
          'world-destroying battle station, while also attempting to rescue '
          'Princess Leia from the mysterious Darth Vader.',
  'Poster': 'https://m.media-amazon.com/images/M/MV5BNzVlY2MwMjktM2E4OS00Y2Y3LWE3ZjctYzhkZGM3YzA1ZWM2XkEyXkFqcGdeQXVyNzkwMjQ5NzM@._V1_SX300.jpg',
  'Production': '20th Century Fox',
  'Rated': 'PG',
  'Ratings': [{'Source': 'Internet Movie Database', 'Value': '8.6/10'},
              {'Source': 'Rotten Tomatoes', 'Value': '93%'},
              {'Source': 'Metacritic', 'Value': '90/1

<h2>Get sctipt from .txt file to dataframe

In [7]:
epiIV = pd.read_csv("SW_EpisodeIV.txt", escapechar='\\', skiprows=0, delimiter=" ", header=0, \
                names=['Key', 'Character', 'Dialog'], )
epiIV_df = pd.DataFrame(epiIV)
movie = sw_data[0]['Title']
epiIV_df['Movie'] = movie 
epiIV_df.head()
ordered_epiIV = epiIV_df[['Movie', 'Key', 'Character', 'Dialog']]
ordered_epiIV.head()

Unnamed: 0,Movie,Key,Character,Dialog
0,Star Wars: Episode IV - A New Hope,1,THREEPIO,Did you hear that? They've shut down the main...
1,Star Wars: Episode IV - A New Hope,2,THREEPIO,We're doomed!
2,Star Wars: Episode IV - A New Hope,3,THREEPIO,There'll be no escape for the Princess this time.
3,Star Wars: Episode IV - A New Hope,4,THREEPIO,What's that?
4,Star Wars: Episode IV - A New Hope,5,THREEPIO,I should have known better than to trust the l...


In [8]:
epiV = pd.read_csv("SW_EpisodeV.txt", escapechar='\\', skiprows=0, delimiter=" ", header=0, \
                names=['Key', 'Character', 'Dialog'], )
epiV_df = pd.DataFrame(epiV)
movie = sw_data[1]['Title']
epiV_df['Movie'] = movie 
ordered_epiV = epiV_df[['Movie', 'Key', 'Character', 'Dialog']]
ordered_epiV.head()

Unnamed: 0,Movie,Key,Character,Dialog
0,Star Wars: Episode V - The Empire Strikes Back,1,LUKE,"Echo Three to Echo Seven. Han, old buddy, do y..."
1,Star Wars: Episode V - The Empire Strikes Back,2,HAN,"Loud and clear, kid. What's up?"
2,Star Wars: Episode V - The Empire Strikes Back,3,LUKE,"Well, I finished my circle. I don't pick up an..."
3,Star Wars: Episode V - The Empire Strikes Back,4,HAN,There isn't enough life on this ice cube to fi...
4,Star Wars: Episode V - The Empire Strikes Back,5,LUKE,Right. I'll see you shortly. There's a meteori...


In [9]:
epiVI = pd.read_csv("SW_EpisodeVI.txt", escapechar='\\', skiprows=0, delimiter=" ", header=0, \
                names=['Key', 'Character', 'Dialog'], )
epiVI_df = pd.DataFrame(epiV)
movie = sw_data[2]['Title']
epiVI_df['Movie'] = movie 
ordered_epiVI = epiVI_df[['Movie', 'Key', 'Character', 'Dialog']]
ordered_epiVI.head()

Unnamed: 0,Movie,Key,Character,Dialog
0,Star Wars: Episode VI - Return of the Jedi,1,LUKE,"Echo Three to Echo Seven. Han, old buddy, do y..."
1,Star Wars: Episode VI - Return of the Jedi,2,HAN,"Loud and clear, kid. What's up?"
2,Star Wars: Episode VI - Return of the Jedi,3,LUKE,"Well, I finished my circle. I don't pick up an..."
3,Star Wars: Episode VI - Return of the Jedi,4,HAN,There isn't enough life on this ice cube to fi...
4,Star Wars: Episode VI - Return of the Jedi,5,LUKE,Right. I'll see you shortly. There's a meteori...


In [10]:
frames = [ordered_epiIV, ordered_epiV, ordered_epiVI]
combined_movies = pd.concat(frames)
combined_movies

Unnamed: 0,Movie,Key,Character,Dialog
0,Star Wars: Episode IV - A New Hope,1,THREEPIO,Did you hear that? They've shut down the main...
1,Star Wars: Episode IV - A New Hope,2,THREEPIO,We're doomed!
2,Star Wars: Episode IV - A New Hope,3,THREEPIO,There'll be no escape for the Princess this time.
3,Star Wars: Episode IV - A New Hope,4,THREEPIO,What's that?
4,Star Wars: Episode IV - A New Hope,5,THREEPIO,I should have known better than to trust the l...
5,Star Wars: Episode IV - A New Hope,6,LUKE,Hurry up! Come with me! What are you waiting...
6,Star Wars: Episode IV - A New Hope,7,THREEPIO,"Artoo! Artoo-Detoo, where are you?"
7,Star Wars: Episode IV - A New Hope,8,THREEPIO,At last! Where have you been?
8,Star Wars: Episode IV - A New Hope,9,THREEPIO,They're heading in this direction. What are we...
9,Star Wars: Episode IV - A New Hope,10,THREEPIO,"Wait a minute, where are you going?"


In [11]:
script_list = combined_movies.to_dict('records')

<h2>Connection to Mongodb

In [12]:
client = MongoClient('mongodb://localhost:27017/')

In [13]:
client

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)

In [14]:
db = client.star_wars

In [15]:
db.omdb_info.drop()
db.sw_script.drop()

In [16]:
collection_omdb = db.omdb_info

In [17]:
result = collection_omdb.insert_many(sw_data)

In [18]:
collection_scripts = db.sw_script

In [19]:
result = collection_scripts.insert_many(script_list)

In [20]:
db.list_collection_names()

['omdb_info', 'sw_script']