# Loading Database
This notebook will load data into a database stored locally using SQLite (https://docs.python.org/2/library/sqlite3.html).

Navigate to:  [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

## Resources

In [2]:
import pandas as pd

In [3]:
# initialize SQLite3 database (stored locally)

import sqlite3                 
engine = sqlite3.connect("bigfish.db")      

In [4]:
acquisitions = "acquisitions_clean.tsv"
assignee = "trimAssignee_clean.tsv"
cpc_current = "trimCPC_current_clean.tsv"
group = "group_clean.tsv"
subgroup = "subgroup_clean.tsv"
subsection = "subsection_clean.tsv"
patent = "trimPatent_clean.tsv"
location = "trimLocation_clean.tsv"
location_assignee = "trimLocation_assignee_clean.tsv"
us_term_of_grant = "trimUS_term_of_grant_clean.tsv"

dataFiles = [acquisitions, assignee, cpc_current, group, subgroup, subsection, 
         patent, location, location_assignee, us_term_of_grant]

## Functions

In [5]:
def read(url):
    if (url[-3]=="c"):
        data = pd.read_csv(url, header=0, error_bad_lines=False)
    else:
        data = pd.read_csv(url, sep="\t", header=0, error_bad_lines=False)
    df = pd.DataFrame(data)
    return df

In [8]:
def loadAllSQLite():
    dfs = []
    
    tables = ["acquisitions", "assignee", "cpc_current", "group", "subgroup", "subsection", 
              "patent", "location", "location_assignee", "us_term_of_grant"]
    
    for i in range(len(tables)):
        dfs.append(read(dataFiles[i]))
        dfs[i].to_sql(tables[i], index=False, if_exists = "replace", con = engine)
    
loadAllSQLite()

  exec(code_obj, self.user_global_ns, self.user_ns)


***
# Tables

## acquisitions
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [14]:
dfAcquisitions = read(acquisitions)
dfAcquisitions.head()

Unnamed: 0,AcquisitionYear,ChildCompany,ParentCompany
0,2015,bebop,Google
1,2015,Fly Labs,Google
2,2015,Clearleap,IBM
3,2015,Metanautix,Microsoft
4,2015,"Talko, Inc.",Microsoft


In [15]:
dfAcquisitions.to_sql("acquisitions", index=False, if_exists = "replace", con = engine)

## ~~application~~
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [16]:
dfApplication = read(application)
dfApplication.head()

Unnamed: 0,id,patent_id,country,date
0,02/002761,D345393,US,1992-12-21
1,02/007691,5164715,US,1990-04-10
2,02/010248,5177974,US,1988-06-23
3,02/020141,5379515,US,1994-02-16
4,02/027172,5264790,US,1991-07-01


In [17]:
# dfApplication.to_sql("application", index=False, if_exists = "replace", con = engine)

## assignee
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [10]:
dfAssignee = read(assignee)
dfAssignee.head()

Unnamed: 0,patent_id,assignee_id,organization
0,6000832,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation
1,7458030,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation
2,D766984,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation
3,D702184,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation
4,8244819,237c2b0099548ddbfa5a37f07e0687ab,Microsoft Corporation


In [11]:
dfAssignee.to_sql("assignee", index=False, if_exists = "replace", con = engine)

## cpc_current
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [18]:
dfCPC_current = read(cpc_current)
dfCPC_current.head()

Unnamed: 0,patent_id,section_id,subsection_id,group_id,subgroup_id
0,3930729,G,G01,G01B,G01B9/02097
1,3930729,G,G01,G01B,G01B2290/25
2,3930857,G,G03,G03F,G03F1/54
3,3930857,G,G03,G03F,G03F1/50
4,3930857,G,G03,G03F,G03F7/022


In [19]:
dfCPC_current.to_sql("cpc_current", index=False, if_exists = "replace", con = engine)

## group
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [20]:
dfGroup = read(group)
dfGroup.head()

Unnamed: 0,id,title
0,A01B,SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...
1,A01C,PLANTING; SOWING; FERTILISING
2,A01D,HARVESTING; MOWING
3,A01F,PROCESSING OF HARVESTED PRODUCE; HAY OR STRAW ...
4,A01G,"HORTICULTURE; CULTIVATION OF VEGETABLES, FLOWE..."


In [21]:
dfGroup.to_sql("group", index=False, if_exists = "replace", con = engine)

## subgroup
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [22]:
dfSubgroup = read(subgroup)
dfSubgroup.head()

Unnamed: 0,id,title
0,A01B1/00,Hand tools
1,A01B1/02,Hand tools -Spades; Shovels
2,A01B1/022,Hand tools -Spades; Shovels -Collapsible; exte...
3,A01B1/024,Hand tools -Spades; Shovels -Foot protectors a...
4,A01B1/026,Hand tools -Spades; Shovels -with auxiliary ha...


In [23]:
dfSubgroup.to_sql("subgroup", index=False, if_exists = "replace", con = engine)

## subsection
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [24]:
dfSubsection = read(subsection)
dfSubsection.head()

Unnamed: 0,id,title
0,A01,AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...
1,A21,BAKING; EDIBLE DOUGHS
2,A22,BUTCHERING; MEAT TREATMENT; PROCESSING POULTRY...
3,A23,"FOODS OR FOODSTUFFS; THEIR TREATMENT, NOT COVE..."
4,A24,TOBACCO; CIGARS; CIGARETTES; SMOKERS' REQUISITES


In [25]:
dfSubsection.to_sql("subsection", index=False, if_exists = "replace", con = engine)

## patent
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [5]:
dfPatent = read(patent)
dfPatent.head()

  if self.run_code(code, result):


Unnamed: 0,id,number,date,abstract,title,num_claims,patent_id
0,3930729,3930729,1976-01-06,An interferometer arrangement of either the t...,Interferometer apparatus incorporating a spher...,39.0,3930729
1,3930857,3930857,1976-01-06,"A resist mask, whose configuration is changed...",Resist process,7.0,3930857
2,3930870,3930870,1976-01-06,An improved process for preparing a polishing...,Silicon polishing solution preparation,16.0,3930870
3,3931435,3931435,1976-01-06,Very sensitive electron beam positive resists...,Electron beam positive resists containing acet...,5.0,3931435
4,3931531,3931531,1976-01-06,A counter circuit counts all transitions of t...,Overlapped signal transition counter,7.0,3931531


In [24]:
dfPatent.to_sql("patent", index=False, if_exists = "replace", con = engine)

## location
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [25]:
dfLocation = read(location)
dfLocation.head()

Unnamed: 0,id,city,latitude,longitude
0,000yomlxfffl,Spruce Pine,35.9153,-82.0647
1,000zaps28vbi,Bridge City Westwego,29.9179,-90.1663
2,002ui5ctlmse,Dahlonega,34.5261,-83.9844
3,0043o8y61p3z,Rancho Bernardo,33.0186,-117.06
4,00a4lw6chrti,Levering,45.6358,-84.787


In [26]:
dfLocation.to_sql("location", index=False, if_exists = "replace", con = engine)

## location_assignee
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [27]:
dfLocation_assignee = read(location_assignee)
dfLocation_assignee.head()

Unnamed: 0,location_id,assignee_id
0,wy09e6twn0s1,eaa92f175be7bfb71011f17eafb1e71f
1,406gqa22ukdm,e572ad43a89039b0d72acc4ce970a33f
2,lulvs12ykwd7,8ce825a978eebf26ad2c13de6e370bb3
3,tdk0ut5vx9ki,6c00cb129070696ef109f6264da00318
4,rfmxwk4iedfc,dabf354c29a6ebba31f54b9ed042241d


In [28]:
dfLocation_assignee.to_sql("location_assignee", index=False, if_exists = "replace", con = engine)

## us_term_of_grant
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [patent](#patent)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [29]:
dfGrant = read(us_term_of_grant)
dfGrant.head()

Unnamed: 0,patent_id,disclaimer_date
0,D657425,0000-00-00
1,D699845,0000-00-00
2,D525308,0000-00-00
3,9193114,0000-00-00
4,D532925,0000-00-00


In [30]:
dfGrant.to_sql("us_term_of_grant", index=False, if_exists = "replace", con = engine)