# Database creation and management routine testing

This notebook mostly contains scrapwork for the `database.py` module file. The code below is intended for testing and experimentation. See the documentation in `database.py` for how to work with the database to add and retrieve data.

## Universal imports

In [3]:
import requests as r  # HTTP handling
import sqlite3 as s   # SQLite3 database
import pandas as pd   # Data handling
import numpy as np    # Math utilities
import imdb as i      # IMDb API handling
import json as j      # JSON handling
import bs4 as bs      # HTML handling

## Global variable declarations

In [4]:
SCRIPTS = './scripts/{}'
DB = 'test.db' # Test database configuration - comment out when production-ready
#DB = 'films.db'

In [81]:

script = bs.BeautifulSoup(data['script'], "lxml")

In [136]:
str(script.pre.pre).split(sep="<b>")[11]

"        INT. HOSPITAL ROOM - TIGHT ON RIPLEY - GATEWAY STATION    3 \n</b>\n        She's lying in a bed, looking wan, as a female MED-TECH\n        raises the backrest.  She is surrounded by arcane white\n        MEDICAL EQUIPMENT.  The Med-Tech exudes practiced\n        cheeriness.\n\n"

In [145]:
str(script.find_all("pre")[-1]).split(sep='<b>')

['<pre>\n\n\n\n\n',
 '                               "ALIENS"\n</b>\n\n                                  by\n\n\n                             James Cameron\n\n\n\n\n\n\n\n',
 '                                                   FIRST DRAFT\n</b>                                                   May 28, 1985\n\n',
 '-----------------------------------------------------------------------------\n</b>\n',
 '                                ALIENS\n</b>\n',
 '        FADE IN\n</b>\n',
 '        SOMETIME IN THE FUTURE - SPACE                            1\n</b>\n        Silent and endless.  The stars shine like the love of\n        God...cold and remote.  Against them drifts a tiny chip\n        of technology.\n\n        CLOSER SHOT  It is the NARCISSUS, lifeboat of the\n        ill-fated star-freighter Nostromo.  Without interior\n        or running lights it seems devoid of life.  The PING\n        of a RANGING RADAR grows louder, closer.  A shadow\n        engulfs the Narcissus.  Searchlight

In [146]:
import re

spaces = re.compile('^[ ]+')
um = spaces.match(str(script.pre.pre).split(sep="<b>")[12])
um.end()

35

In [190]:
class Script(object):
    
    def __init__(self, text):     
        self.data = str(bs.BeautifulSoup(text, "lxml").find_all("pre")[-1])
        self.df = pd.DataFrame()
        self.parts = self.data.split(sep='<b>')
        self.df = self.df.assign(parts=pd.Series(self.parts).values)
        
        # Determine number of lead spaces
        spaces = re.compile('^[ ]+')        
        lead_spaces = []
        for x in self.parts:
            count = spaces.match(x)
            if count == None:
                lead_spaces.append(0)
            else:
                lead_spaces.append(count.end())
        self.spaces = lead_spaces
        self.df = self.df.assign(spaces=pd.Series(self.spaces).values)
        self.spacings = sorted(self.df['spaces'].unique())
        
        # Divide parts into lines
        part_lines = []
        for x in self.parts:
            lines = [y.strip() for y in x.split(sep="</b>")]
            part_lines.append(lines)
        self.part_lines = part_lines
        self.df = self.df.assign(lines=pd.Series(self.part_lines).values)

In [191]:
script = Script(data['script'])

In [210]:
script.df['lines'][30]

['DOCTOR',
 "Hold her...Get me an airway, stat!\n                    And fifteen cc's of...Jesus!\n\n        AN EXPLOSION OF BLOOD beneath the sheet covering her\n        chest!  Ripley stares at the SHAPE RISING UNDER THE\n        SHEET.  Tearing itself out of her.\n\n        HER P.O.V. as the sheet rises.  A GLIMPSE OF the"]

In [211]:
script.parts

['<pre>\n\n\n\n\n',
 '                               "ALIENS"\n</b>\n\n                                  by\n\n\n                             James Cameron\n\n\n\n\n\n\n\n',
 '                                                   FIRST DRAFT\n</b>                                                   May 28, 1985\n\n',
 '-----------------------------------------------------------------------------\n</b>\n',
 '                                ALIENS\n</b>\n',
 '        FADE IN\n</b>\n',
 '        SOMETIME IN THE FUTURE - SPACE                            1\n</b>\n        Silent and endless.  The stars shine like the love of\n        God...cold and remote.  Against them drifts a tiny chip\n        of technology.\n\n        CLOSER SHOT  It is the NARCISSUS, lifeboat of the\n        ill-fated star-freighter Nostromo.  Without interior\n        or running lights it seems devoid of life.  The PING\n        of a RANGING RADAR grows louder, closer.  A shadow\n        engulfs the Narcissus.  Searchlight

In [214]:
script.parts[8].split(sep="\n\n")

['                                   LEADER\n</b>                           (filtered)\n                    Internal pressure positive.  Assume\n                    nominal hull integrity.  Hypersleep\n                    capsules, style circa late twenties...',
 '        His gloved hand wipes at on opaque layer of dust on the\n        canopy.',
 '        ANGLE INSIDE CAPSULE  as light stabs in where the dust is\n        wiped away, illuminating a WOMAN, her face in peaceful\n        repose.',
 "        WARRANT OFFICER RIPLEY, sole survivor of the Nostromo.\n        Nestled next to her is JONES, the ship's wayward cat.",
 '']

In [None]:
script.data.split(sep="\n")

In [None]:
len(script.parts)

In [218]:
script.parts[8].split(sep="</b>")[1].split(sep="\n")

['                           (filtered)',
 '                    Internal pressure positive.  Assume',
 '                    nominal hull integrity.  Hypersleep',
 '                    capsules, style circa late twenties...',
 '',
 '        His gloved hand wipes at on opaque layer of dust on the',
 '        canopy.',
 '',
 '        ANGLE INSIDE CAPSULE  as light stabs in where the dust is',
 '        wiped away, illuminating a WOMAN, her face in peaceful',
 '        repose.',
 '',
 '        WARRANT OFFICER RIPLEY, sole survivor of the Nostromo.',
 "        Nestled next to her is JONES, the ship's wayward cat.",
 '',
 '']

In [220]:
header = script.parts[8].split(sep="</b>")[0]
content = script.parts[8].split(sep="</b>")[1].split(sep="\n")

# Determine number of lead spaces
spaces = re.compile('^[ ]+')        
line_info = []
for num, x in enumerate(content):
    count = spaces.match(x)
    if count == None:
        line_info.append((num,x.strip(),0,len(x)))
    else:
        line_info.append((num,x.strip(),count.end(),len(x)))
line_info

[(0, '(filtered)', 27, 37),
 (1, 'Internal pressure positive.  Assume', 20, 55),
 (2, 'nominal hull integrity.  Hypersleep', 20, 55),
 (3, 'capsules, style circa late twenties...', 20, 58),
 (4, '', 0, 0),
 (5, 'His gloved hand wipes at on opaque layer of dust on the', 8, 63),
 (6, 'canopy.', 8, 15),
 (7, '', 0, 0),
 (8, 'ANGLE INSIDE CAPSULE  as light stabs in where the dust is', 8, 65),
 (9, 'wiped away, illuminating a WOMAN, her face in peaceful', 8, 62),
 (10, 'repose.', 8, 15),
 (11, '', 0, 0),
 (12, 'WARRANT OFFICER RIPLEY, sole survivor of the Nostromo.', 8, 62),
 (13, "Nestled next to her is JONES, the ship's wayward cat.", 8, 61),
 (14, '', 0, 0),
 (15, '', 0, 0)]

In [233]:
line_group = {}
spacing = 0
gap = 0
group = 1
lines = []
buffer = []
connect = " "
for x in line_info:
    if x[0] != 0:
        if x[2] == 0:
            spacing = 0
        elif x[2] == spacing:
            lines.append(x[0])
            buffer.append(x[1])
        else:
            line_group[group] = (lines, gap, connect.join(buffer))
            group += 1
            spacing = x[2]
            gap = x[2]
            lines = [x[0]]
            buffer = [x[1]]
    else:
        spacing = x[2]
        gap = x[2]
        lines = [x[0]]
        buffer = [x[1]]
line_group[group] = (lines, gap, connect.join(buffer))
line_group 

{1: ([0], 27, '(filtered)'),
 2: ([1, 2, 3],
  20,
  'Internal pressure positive.  Assume nominal hull integrity.  Hypersleep capsules, style circa late twenties...'),
 3: ([5, 6],
  8,
  'His gloved hand wipes at on opaque layer of dust on the canopy.'),
 4: ([8, 9, 10],
  8,
  'ANGLE INSIDE CAPSULE  as light stabs in where the dust is wiped away, illuminating a WOMAN, her face in peaceful repose.'),
 5: ([12, 13],
  8,
  "WARRANT OFFICER RIPLEY, sole survivor of the Nostromo. Nestled next to her is JONES, the ship's wayward cat.")}

In [80]:
def parse_info(file):
    with open(SCRIPTS.format(file)) as f:
        data = j.load(f)

    d = bs.BeautifulSoup(data['info_raw'], "lxml")
    tds = d.find_all("td")

    raw = str(tds[2]).split(sep="<br/>")
    parse = raw[8:-2]

    parsed = {
        'opinion':raw[1],
        'rating':raw[4],
        'avg_usr_rtg':raw[6],
    }
    
    def checker(entry):
        if parsed[entry] == '\xa0\xa0None available':
            return parsed[entry] = None
        else:
            return parsed[entry] = parsed[entry].split(sep="> (")[1][:-11]
        

    for x in parse:
        if x[0:1] == '\n': 
            key = x.split(sep="</b>")[0].split(sep="<b>")[1]
            if x.split(sep="</b>")[1] != '':
                parsed[key] = x.split(sep="</b>")[1][3:]
            else:
                parsed[key] = []
                write = parsed[key]
        elif x == '':
            pass
        else:
            write.append(x.split(sep='">')[1][:-4])

    return parsed

{'Genres': ['Action', 'Horror', 'Sci-Fi', 'Thriller'],
 'Script Date': 'May 1985',
 'Writers': ['James Cameron'],
 'avg_usr_rtg': '\xa0\xa0<img src="/images/rating/10-stars.gif"/> (9.80 out of 10)',
 'opinion': '\xa0\xa0None available',
 'rating': '\xa0\xa0Not available'}