# Loading Database
This notebook will load data into a database stored locally using SQLite (https://docs.python.org/2/library/sqlite3.html).

Navigate to:  [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

## Resources

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sqlite3                  # SQL Lite -- local database
# import cx_Oracle as cx        # <-- wasn't sure if we'll be using this

In [2]:
# define a connection string to the database
engine = sqlite3.connect("bigfish.db")      

In [11]:
acquisitions = "acquisitions_clean.csv"
application = "application_clean.csv"
assignee = "assignee_clean.csv"
cpc_current = "cpc_current_clean.csv"
group = "group_clean.tsv"
subgroup = "subgroup_clean.tsv"
subsection = "subsection_clean.tsv"
location = "location_clean.tsv" 
location_assignee = "location_assignee_clean.csv"
us_term_of_grant = "us_term_of_grant_clean.csv"

files = ["acquisitions_clean.csv", 
         "assignee_clean.csv", 
         "cpc_current_clean.csv",
         "group_clean.tsv", 
         "subgroup_clean.tsv",
         "subsection_clean.tsv",
         "location_clean.tsv",
         "location_assignee_clean.csv",
         "us_term_of_grant_clean.csv"]

## Functions

In [4]:
def read(url):
    if (url[-3]=="c"):
        data = pd.read_csv(url, header=0)
    else:
        data = pd.read_csv(url, sep="\t", header=0)
    df = pd.DataFrame(data)
    return df

***
# Tables

## acquisitions
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [10]:
dfAcquisitions = read(acquisitions)
dfAcquisitions.head()

Unnamed: 0,AcquisitionYear,ChildCompany,ParentCompany
0,2015,bebop,Google
1,2015,Fly Labs,Google
2,2015,Clearleap,IBM
3,2015,Metanautix,Microsoft
4,2015,"Talko, Inc.",Microsoft


In [6]:
# push the dataframe to the database
dfAcquisitions.to_sql("acquisitions", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfAcquisitions:
#     sqlquery="INSERT INTO acquisitions VALUES (%d,'%s','%s')" %(row['AcquisitionYear'],row['ChildCompany'],row['ParentCompany'])


## application
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [12]:
dfApplication = read(application)
dfApplication.head()

Unnamed: 0,patent_id,country,date
0,D345393,US,1992-12-21
1,5164715,US,1990-04-10
2,5177974,US,1988-06-23
3,5379515,US,1994-02-16
4,5264790,US,1991-07-01


In [13]:
# push the dataframe to the database
dfApplication.to_sql("application", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfApplication:
#     sqlquery="INSERT INTO application VALUES ('%s','%s','%s')" %(row['patent_id'],row['country'],row['date'])

## assignee
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [14]:
dfAssignee = read(assignee)
dfAssignee.head()

Unnamed: 0.1,Unnamed: 0,patent_id,assignee_id,organization
0,0,5856666,eaa92f175be7bfb71011f17eafb1e71f,U.S. Philips Corporation
1,1,5204210,e572ad43a89039b0d72acc4ce970a33f,Xerox Corporation
2,2,5302149,8ce825a978eebf26ad2c13de6e370bb3,Commonwealth Scientific & Industrial Research ...
3,3,9104354,6c00cb129070696ef109f6264da00318,Canon Kabushiki Kaisha
4,4,6584517,dabf354c29a6ebba31f54b9ed042241d,Cypress Semiconductor Corp.


In [15]:
# push the dataframe to the database
dfAssignee.to_sql("assignee", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfAssignee:
#     sqlquery="INSERT INTO assignee VALUES ('%s','%s','%s')" %(row['patent_id'],row['assignee_id'],row['organization'])


  dtype=dtype)


## cpc_current
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [16]:
dfCPC_current = read(cpc_current)
dfCPC_current.head()

Unnamed: 0,patent_id,section_id,subsection_id,group_id,subgroup_id
0,3930272,A,A47,A47D,A47D7/02
1,3930272,Y,Y10,Y10T,Y10T403/32451
2,3930273,A,A61,A61G,A61G7/0507
3,3930273,A,A61,A61G,A61G7/0509
4,3930274,B,B63,B63B,B63B7/085


In [17]:
# push the dataframe to the database
dfCPC_current.to_sql("cpc_current", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfCPC_current:
#     sqlquery="INSERT INTO cpc_current VALUES ('%s','%s','%s','%s','%s')" %(row['patent_id'],row['section_id'],row['subsection_id'],row['group_id'],row['subgroup_id'])


## group
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [18]:
dfGroup = read(group)
dfGroup.head()

Unnamed: 0,id,title
0,A01B,SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...
1,A01C,PLANTING; SOWING; FERTILISING
2,A01D,HARVESTING; MOWING
3,A01F,PROCESSING OF HARVESTED PRODUCE; HAY OR STRAW ...
4,A01G,"HORTICULTURE; CULTIVATION OF VEGETABLES, FLOWE..."


In [19]:
# push the dataframe to the database
dfGroup.to_sql("group", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfGroup:
#     sqlquery="INSERT INTO group VALUES ('%s','%s')" %(row['id'],row['title'])



## subgroup
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [22]:
dfSubgroup = read(subgroup)
dfSubgroup.head()

Unnamed: 0,id,title
0,A01B1/00,Hand tools
1,A01B1/02,Hand tools -Spades; Shovels
2,A01B1/022,Hand tools -Spades; Shovels -Collapsible; exte...
3,A01B1/024,Hand tools -Spades; Shovels -Foot protectors a...
4,A01B1/026,Hand tools -Spades; Shovels -with auxiliary ha...


In [23]:
# push the dataframe to the database
dfSubgroup.to_sql("subgroup", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfSubgroup:
#     sqlquery="INSERT INTO subgroup VALUES ('%s','%s')" %(row['id'],row['title'])


## subsection
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [24]:
dfSubsection = read(subsection)
dfSubsection.head()

Unnamed: 0,id,title
0,A01,AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...
1,A21,BAKING; EDIBLE DOUGHS
2,A22,BUTCHERING; MEAT TREATMENT; PROCESSING POULTRY...
3,A23,"FOODS OR FOODSTUFFS; THEIR TREATMENT, NOT COVE..."
4,A24,TOBACCO; CIGARS; CIGARETTES; SMOKERS' REQUISITES


In [25]:
# push the dataframe to the database
dfSubsection.to_sql("subsection", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfSubsection:
#     sqlquery="INSERT INTO subsection VALUES ('%s','%s')" %(row['id'],row['title'])
    

## location
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [26]:
dfLocation = read(location)
dfLocation.head()

Unnamed: 0,id,city,latitude,longitude
0,000yomlxfffl,Spruce Pine,35.9153,-82.0647
1,000zaps28vbi,Bridge City Westwego,29.9179,-90.1663
2,002ui5ctlmse,Dahlonega,34.5261,-83.9844
3,0043o8y61p3z,Rancho Bernardo,33.0186,-117.06
4,00a4lw6chrti,Levering,45.6358,-84.787


In [27]:
# push the dataframe to the database
dfLocation.to_sql("location", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfLocation:
#     sqlquery="INSERT INTO location VALUES ('%s','%s',%d,%d)" %(row['id'],row['city'],row['latitude'],row['longitude'])
    

## location_assignee
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [28]:
dfLocation_assignee = read(location_assignee)
dfLocation_assignee.head()

Unnamed: 0,location_id,assignee_id
0,wy09e6twn0s1,eaa92f175be7bfb71011f17eafb1e71f
1,406gqa22ukdm,e572ad43a89039b0d72acc4ce970a33f
2,lulvs12ykwd7,8ce825a978eebf26ad2c13de6e370bb3
3,tdk0ut5vx9ki,6c00cb129070696ef109f6264da00318
4,rfmxwk4iedfc,dabf354c29a6ebba31f54b9ed042241d


In [29]:
# push the dataframe to the database
dfLocation_assignee.to_sql("location_assignee", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfLocation_assignee:
#     sqlquery="INSERT INTO location_assignee VALUES ('%s','%s')" %(row['location_id'],row['assignee_id'])
    

## us_term_of_grant
[resources](#Resources)
 | [functions](#Functions)
 | [acquisitions](#acquisitions)
 | [application](#application)
 | [assignee](#assignee) 
 | [cpc_current](#cpc_current)
 | [group](#group)
 | [subgroup](#subgroup)
 | [subsection](#subsection)
 | [location](#location)
 | [location_assignee](#location_assignee)
 | [us_term_of_grant](#us_term_of_grant)

In [30]:
dfGrant = read(us_term_of_grant)
dfGrant.head()

Unnamed: 0,patent_id,disclaimer_date
0,D657425,0000-00-00
1,D699845,0000-00-00
2,D525308,0000-00-00
3,9193114,0000-00-00
4,D532925,0000-00-00


In [31]:
# push the dataframe to the database
dfGrant.to_sql("us_term_of_grant", if_exists = "replace", con = engine)

# not sure if we need this if we have above:
# for row in dfGrant:
#     sqlquery="INSERT INTO us_term_of_grant VALUES ('%s','%s')" %(row['patent_id'],row['disclaimer_date'])
    