# Loading Database
This notebook will load data into a database stored locally using SQLite (https://docs.python.org/2/library/sqlite3.html).

Navigate to:  [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

## Resources

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

In [26]:
# initialize SQLite3 database (stored locally)

import sqlite3                 
engineSqlite = sqlite3.connect("bigfish.db")    

In [2]:
# initialize MySQL database

engine = create_engine('mysql+pymysql://root:PASSWORDGOESHERE@localhost:3306/DATABASENAME', echo=False) 

In [3]:
acquisitions = "acquisitions_clean.tsv"
assignee = "trimAssignee_clean.tsv"
cpc_current = "trimCPC_current_clean.tsv"
group = "group_clean.tsv"
subgroup = "subgroup_clean.tsv"
subsection = "subsection_clean.tsv"
patent = "trimPatent_clean.tsv"
location = "trimLocation_clean.tsv"
location_assignee = "trimLocation_assignee_clean.tsv"
us_term_of_grant = "trimUS_term_of_grant_clean.tsv"

dataFiles = [acquisitions, assignee, cpc_current, group, subgroup, subsection, 
         patent, location, location_assignee, us_term_of_grant]

## Functions

In [4]:
def read(url):
    if (url[-3]=="c"):
        data = pd.read_csv(url, header=0, error_bad_lines=False)
    else:
        data = pd.read_csv(url, sep="\t", header=0, error_bad_lines=False)
    df = pd.DataFrame(data)
    return df

In [28]:
def loadAllDatabase(engineType):
    dfs = []
    
    tables = ["acquisitions", "assignee", "cpc_current", "group", "subgroup", "subsection", 
              "patent", "location", "location_assignee", "us_term_of_grant"]
    
    for i in range(len(tables)):
        dfs.append(read(dataFiles[i]))
        dfs[i].to_sql(tables[i], index=False, if_exists = "replace", con = engineType)
    
loadAllDatabase(engineSqlite)

  exec(code_obj, self.user_global_ns, self.user_ns)


***
# Tables

## acquisitions
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [5]:
dfAcquisitions = read(acquisitions)
dfAcquisitions.head()

Unnamed: 0,AcquisitionYear,ChildCompany,ParentCompany
0,2015,bebop,Google
1,2015,Fly Labs,Google
2,2015,Clearleap,IBM
3,2015,Metanautix,Microsoft
4,2015,"Talko, Inc.",Microsoft


In [6]:
dfAcquisitions.to_sql("acquisitions", index=False, if_exists = "replace", con = engine)

  result = self._query(query)


## ~~application~~
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [16]:
dfApplication = read(application)
dfApplication.head()

Unnamed: 0,id,patent_id,country,date
0,02/002761,D345393,US,1992-12-21
1,02/007691,5164715,US,1990-04-10
2,02/010248,5177974,US,1988-06-23
3,02/020141,5379515,US,1994-02-16
4,02/027172,5264790,US,1991-07-01


In [17]:
# dfApplication.to_sql("application", index=False, if_exists = "replace", con = engine)

## assignee
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [7]:
dfAssignee = read(assignee)
dfAssignee.head()

Unnamed: 0,patent_id,assignee_id,organization,org
0,6000832,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation,Microsoft
1,7458030,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation,Microsoft
2,D766984,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation,Microsoft
3,D702184,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation,Microsoft
4,8244819,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation,Microsoft


In [8]:
dfAssignee.to_sql("assignee", index=False, if_exists = "replace", con = engine)

## cpc_current
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [9]:
dfCPC_current = read(cpc_current)
dfCPC_current.head()

Unnamed: 0,patent_id,section_id,subsection_id,group_id,subgroup_id
0,3930729,G,G01,G01B,G01B9/02097
1,3930729,G,G01,G01B,G01B2290/25
2,3930857,G,G03,G03F,G03F1/54
3,3930857,G,G03,G03F,G03F1/50
4,3930857,G,G03,G03F,G03F7/022


In [10]:
dfCPC_current.to_sql("cpc_current", index=False, if_exists = "replace", con = engine)

## group
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [11]:
dfGroup = read(group)
dfGroup.head()

Unnamed: 0,id,title
0,A01B,SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...
1,A01C,PLANTING; SOWING; FERTILISING
2,A01D,HARVESTING; MOWING
3,A01F,PROCESSING OF HARVESTED PRODUCE; HAY OR STRAW ...
4,A01G,"HORTICULTURE; CULTIVATION OF VEGETABLES, FLOWE..."


In [12]:
dfGroup.to_sql("group", index=False, if_exists = "replace", con = engine)

## subgroup
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [13]:
dfSubgroup = read(subgroup)
dfSubgroup.head()

Unnamed: 0,id,title
0,A01B1/00,Hand tools
1,A01B1/02,Hand tools -Spades; Shovels
2,A01B1/022,Hand tools -Spades; Shovels -Collapsible; exte...
3,A01B1/024,Hand tools -Spades; Shovels -Foot protectors a...
4,A01B1/026,Hand tools -Spades; Shovels -with auxiliary ha...


In [14]:
dfSubgroup.to_sql("subgroup", index=False, if_exists = "replace", con = engine)

## subsection
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [15]:
dfSubsection = read(subsection)
dfSubsection.head()

Unnamed: 0,id,title
0,A01,AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...
1,A21,BAKING; EDIBLE DOUGHS
2,A22,BUTCHERING; MEAT TREATMENT; PROCESSING POULTRY...
3,A23,"FOODS OR FOODSTUFFS; THEIR TREATMENT, NOT COVE..."
4,A24,TOBACCO; CIGARS; CIGARETTES; SMOKERS' REQUISITES


In [16]:
dfSubsection.to_sql("subsection", index=False, if_exists = "replace", con = engine)

## patent
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [17]:
dfPatent = read(patent)
dfPatent.head()

  if self.run_code(code, result):


Unnamed: 0,date,title,num_claims,patent_id,year
0,1976-01-06,Interferometer apparatus incorporating a spher...,39.0,3930729,1976
1,1976-01-06,Resist process,7.0,3930857,1976
2,1976-01-06,Silicon polishing solution preparation,16.0,3930870,1976
3,1976-01-06,Electron beam positive resists containing acet...,5.0,3931435,1976
4,1976-01-06,Overlapped signal transition counter,7.0,3931531,1976


In [18]:
dfPatent.to_sql("patent", index=False, if_exists = "replace", con = engine)

## location
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [19]:
dfLocation = read(location)
dfLocation.head()

Unnamed: 0,id,city,latitude,longitude
0,00ajij86hkk9,Renton,47.4545,-122.223
1,03gsxpxq1ltp,White Plains,41.034,-73.7629
2,067ckw1czast,Secaucus,40.7895,-74.0565
3,0grbym9qhe5d,Poway,32.9628,-117.036
4,1udrm9haouwb,El Segundo,33.9192,-118.416


In [20]:
dfLocation.to_sql("location", index=False, if_exists = "replace", con = engine)

## location_assignee
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [21]:
dfLocation_assignee = read(location_assignee)
dfLocation_assignee.head()

Unnamed: 0,location_id,assignee_id
0,qxpuc04niivb,b12e2d8b345facb1f8bf63ce20573dd4
1,6qcwqs98tfh0,b12e2d8b345facb1f8bf63ce20573dd4
2,e1aw0l0xbcd6,b12e2d8b345facb1f8bf63ce20573dd4
3,23pta3jjpriv,237c2b0099548ddbfa5a37f07e0687ab
4,656n7m0unkh8,237c2b0099548ddbfa5a37f07e0687ab


In [22]:
dfLocation_assignee.to_sql("location_assignee", index=False, if_exists = "replace", con = engine)

## us_term_of_grant
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [23]:
dfGrant = read(us_term_of_grant)
dfGrant.head()

Unnamed: 0,patent_id,disclaimer_date
0,7797754,0000-00-00
1,8490014,0000-00-00
2,8918651,0000-00-00
3,7661088,0000-00-00
4,D535663,0000-00-00


In [24]:
dfGrant.to_sql("us_term_of_grant", index=False, if_exists = "replace", con = engine)