## Getting data
This notebook will read in multiple files that contain star wars movie dialogue and create tables based on the characters which will be stored on mongo db.

*** 
### Import packages 

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from pymongo import MongoClient
import requests
import pandas as pd

*** 
### Read in tables
read the text files as tables then organize them.

In [3]:
IV_df = pd.read_table('star-wars-movie-scripts/SW_EpisodeIV.txt',
               delim_whitespace=True, header=0, escapechar='\\')
IV_df['movie'] = 'IV'

In [4]:
V_df = pd.read_table('star-wars-movie-scripts/SW_EpisodeV.txt',
               delim_whitespace=True, header=0, escapechar='\\')
V_df['movie'] = 'V'

In [5]:
VI_df = pd.read_table('star-wars-movie-scripts/SW_EpisodeVI.txt',
               delim_whitespace=True, header=0, escapechar='\\')
VI_df['movie'] = 'VI'

In [6]:
I_df = pd.read_table('star-wars-movie-scripts/SW_EpisodeI.txt', header=0, sep =' : ', escapechar='\\')
I_df['movie'] = 'I'

In [7]:
II_df = pd.read_table('star-wars-movie-scripts/SW_EpisodeII.txt', header=0, sep =' : ', escapechar='\\')
II_df['movie'] = 'II'

In [8]:
III_df = pd.read_table('star-wars-movie-scripts/SW_EpisodeIII.txt', header=0, sep =': ', escapechar='\\')
III_df['movie'] = 'III'

In [9]:
pdList = [IV_df, V_df, VI_df, I_df, II_df, III_df]
dialogue_df = pd.concat(pdList)

In [10]:
dialogue_df['character'] = dialogue_df['character'].map(lambda x: x.replace('BEN','OBI-WAN').replace('DARTH VADER','VADER').replace('THREEPIO','C-3PO').strip())

In [11]:
counts = dialogue_df['character'].value_counts().to_dict()
counts

{'ANAKIN': 692,
 'OBI-WAN': 589,
 'LUKE': 494,
 'HAN': 459,
 'C-3PO': 358,
 'PADME': 356,
 'LEIA': 227,
 'QUI-GON': 211,
 'YODA': 168,
 'PALPATINE': 153,
 'VADER': 145,
 'JAR JAR': 106,
 'LANDO': 101,
 'MACE WINDU': 90,
 'AMIDALA': 48,
 'NUTE': 48,
 'BAIL ORGANA': 47,
 'EMPEROR': 44,
 'WATTO': 42,
 'CAPT. PANAKA': 39,
 'RED LEADER': 38,
 'DARTH SIDIOUS': 37,
 'SHMI': 36,
 'COUNT DOOKU': 35,
 'BIGGS': 34,
 'WEDGE': 33,
 'OWEN': 32,
 'PIETT': 31,
 'JABBA': 29,
 'TARKIN': 28,
 'GENERAL GRIEVOUS': 27,
 'CREATURE': 25,
 'TROOPER': 19,
 'LAMA SU': 19,
 'BOSS NASS': 17,
 'JOCASTA NU': 15,
 'RUNE': 15,
 'SIO BIBBLE': 15,
 'JANGO FETT': 15,
 'GOLD LEADER': 14,
 'ACKBAR': 14,
 'RIEEKAN': 13,
 'RIC OLIE': 13,
 'OFFICER': 13,
 'MAS AMEDDA': 12,
 'PILOT': 11,
 'DEXTER JETTSTER': 11,
 'TAUN WE': 11,
 'BOBA FETT': 11,
 'CAPTAIN TYPHO': 10,
 'CLONE COMMANDER CODY': 10,
 'COMMANDER': 10,
 'MON MOTHMA': 9,
 'QUEEN JAMILLIA': 9,
 'CONTROLLER': 8,
 'KITSTER': 8,
 'VALORUM': 8,
 'INTERCOM VOICE': 8,
 'RED 

In [11]:
dialogue_df

Unnamed: 0,character,dialogue,movie
1,C-3PO,Did you hear that? They've shut down the main...,IV
2,C-3PO,We're doomed!,IV
3,C-3PO,There'll be no escape for the Princess this time.,IV
4,C-3PO,What's that?,IV
5,C-3PO,I should have known better than to trust the l...,IV
...,...,...,...
1140,YODA,"How to join the Force, he will train you. Your...",III
1141,BAIL ORGANA,Captain Antilles.,III
1142,CAPTAIN ANTILLES,"Yes, Your Highness.",III
1143,BAIL ORGANA,I'm placing these droids in your care. Treat t...,III


In [12]:
client = MongoClient()

In [13]:
db = client.starwars
scripts = db.scripts

In [15]:
dialogue_df.reset_index(level=0, inplace=True, drop=True)
scripts.insert_many(dialogue_df.to_dict(orient='records'))

<pymongo.results.InsertManyResult at 0x102241b280>

In [14]:
movies = db.movies
dialogue_df.reset_index(level=0, inplace=True, drop=True)
movies.insert_many(dialogue_df.to_dict(orient='records'))

<pymongo.results.InsertManyResult at 0x102240caf0>

In [16]:
for db in client.list_databases():
    print(db)

{'name': 'admin', 'sizeOnDisk': 40960.0, 'empty': False}
{'name': 'books', 'sizeOnDisk': 348160.0, 'empty': False}
{'name': 'config', 'sizeOnDisk': 110592.0, 'empty': False}
{'name': 'events', 'sizeOnDisk': 73728.0, 'empty': False}
{'name': 'local', 'sizeOnDisk': 40960.0, 'empty': False}
{'name': 'outings', 'sizeOnDisk': 40960.0, 'empty': False}
{'name': 'starwars', 'sizeOnDisk': 8192.0, 'empty': False}


In [17]:
scripts.find_one()

{'_id': ObjectId('5e4dad07828c421324ab5f35'),
 'character': 'C-3PO',
 'dialogue': "Did you hear that?  They've shut down the main reactor.  We'll be destroyed for sure.  This is madness!"}

In [18]:
characters_list = ['Anakin', 'Luke' 'Obi_Wan', 'Han', 'Padme','Yoda','Vader','Leia','C_3PO']

In [20]:
text = scripts.find({'character': 'ANAKIN'},{'_id':0, 'dialogue':1})
db = client.starwars
anakin = db.anakin
anakin.insert_many(text)

<pymongo.results.InsertManyResult at 0x105bb8410>

In [21]:
anakin.find_one()

{'_id': ObjectId('5e4dad30828c421324ab751c'),
 'dialogue': 'Now...go, my son. Leave me.'}

In [22]:
text = scripts.find({'character': 'LUKE'},{'_id':0, 'dialogue':1})
luke = db.luke
luke.insert_many(text)

<pymongo.results.InsertManyResult at 0x1022bcc780>

In [23]:
text = scripts.find({'character': 'OBI-WAN'},{'_id':0, 'dialogue':1})
obi_wan = db.obi_wan
obi_wan.insert_many(text)

<pymongo.results.InsertManyResult at 0x10222daaa0>

In [24]:
text = scripts.find({'character': 'HAN'},{'_id':0, 'dialogue':1})
han = db.han
han.insert_many(text)

<pymongo.results.InsertManyResult at 0x1022bc7780>

In [25]:
text = scripts.find({'character': 'PADME'},{'_id':0, 'dialogue':1})
padme = db.padme
padme.insert_many(text)

<pymongo.results.InsertManyResult at 0x10220dfb90>

In [26]:
text = scripts.find({'character': 'YODA'},{'_id':0, 'dialogue':1})
yoda = db.yoda
yoda.insert_many(text)

<pymongo.results.InsertManyResult at 0x1021ab6d20>

In [27]:
text = scripts.find({'character': 'LEIA'},{'_id':0, 'dialogue':1})
leia = db.leia
leia.insert_many(text)

<pymongo.results.InsertManyResult at 0x1022b66780>

In [28]:
text = scripts.find({'character': 'C-3PO'},{'_id':0, 'dialogue':1})
c_3po = db.c_3po
c_3po.insert_many(text)

<pymongo.results.InsertManyResult at 0x1022b571e0>

In [30]:
text = scripts.find({'character': 'VADER'},{'_id':0, 'dialogue':1})
vader = db.vader
vader.insert_many(text)

<pymongo.results.InsertManyResult at 0x105bb7fa0>

In [15]:
db.collection_names()

['obi_wan',
 'vader',
 'luke',
 'c_3po',
 'han',
 'padme',
 'movies',
 'scripts',
 'yoda',
 'anakin',
 'leia']