# SQLITE3  in Python

SQLlite is a package to manipulate dataset using the Structured Query Language.

In this notebook we introduce the concept of a SQL database and show how to interact with these databases in Python.

The fundamental concepts in SQL are the following:
* a database is a collection of datasets stored as tables
* a table is a dataset consisting of a number of rows and columns, where each column has a name and a type
* a cursor is a Python object that allows us to interact with the tables in a database in several ways
  * INSERT allows us to add new rows
  * UPDATE allows us to modify the content of a row
  * DELETE allows us to remove rows
  * SELECT allows us to return rows of data from the tables in the database

Here is a link to all of the [SQLite statements](https://www.sqlite.org/lang.html) (and their grammar)

As usual we first import the libraries we will need.


In [1]:
import sqlite3
import csv
'done'

'done'

# Creating a table to store the data
SQL requires that all data be stored in tables where each column has a name and a type.

The SQLite types are TEXT, NUMERIC, INTEGER, REAL, BLOB 

Here we define a function to create the table we are going to use to store the PA01 dataset.

The PA01 dataset, courses.csv, has the following form as a CSV file:
```
subj	num	suf	format	term	code	inst	title	sec	enr
NEJS	188	A	LEC	Fall 2004	1043	Levy, Avigdor	RISE/DECLINE:OTTOMAN EMP	1	32
CHEM	121	A	LEC	Fall 2004	1043	Foxman, Bruce	INORGANIC CHEM I, LECS.	1	18
CHEM	130	A	LEC	Fall 2004	1043	Yu, Jinquan	ADV ORG. CHEM: STRUCTURE	1	14
```
We create the table below to store this data.

In [2]:
def create_data_table():
    ''' create a table to store the Brandeis course data'''
    con= sqlite3.connect('courses.db')
    cur = con.cursor()
    cur.execute('''CREATE TABLE data 
                     (subj text, num int, suf text, format text, term text, code int, inst text, title text, sec text, enr int)''')
    con.commit()
    con.close()

def remove_data_table():
    ''' remove the table and all of its data from the database'''
    con= sqlite3.connect('courses.db')
    cur = con.cursor()
    cur.execute('''DROP TABLE data ''')
    con.commit()
    con.close()

# loading the data
To load data into a database we can use the executemany cursor command
with an "INSERT SQL" command. Each row in rows is a tuple with 10 elements (matching the 10 question marks in the quer)




In [22]:
def load_data():
    con= sqlite3.connect('courses.db')
    cur = con.cursor()
    course_file = open('data/courses.csv')
    rows = csv.reader(course_file)
    for row in rows:
        cur.execute("INSERT INTO data VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",row)
    con.commit()
    con.close()

def load_data2():
    con= sqlite3.connect('courses.db')
    cur = con.cursor()
    course_file = open('data/courses.csv')
    rows = csv.reader(course_file)
    cur.executemany("INSERT INTO data VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",rows)
    con.commit()
    con.close()
    
def get_all_data():
    con= sqlite3.connect('courses.db')
    cur = con.cursor()
    results = cur.execute("SELECT * FROM data")
    data= [x for x in results]
    con.commit()
    con.close()
    return data

def remove_all_data():
    con= sqlite3.connect('courses.db')
    cur = con.cursor()
    results = cur.execute("DELETE FROM data")
    con.commit()
    con.close()


In [None]:
con= sqlite3.connect('courses.db')
cur = con.cursor()
results = cur.execute("SELECT name FROM sqlite_master WHERE type='table';")
for row in results:
    print(row)
con.commit()
con.close()


# Creating a SQLite shell

In [36]:
def shell():
    
    
    command = input(">> ")
    while command !='quit':
        con= sqlite3.connect('courses.db')
        cur = con.cursor()
        
        results = cur.execute(command)
        for row in results:
            print(row)
        con.commit()
        con.close()
        
        command = input(">> ")
    print('bye')
    
    

        
    

In [37]:
shell()

>>  select * from data limit 5


('subj', 'num', 'suf', 'format', 'term', 'code', 'inst', 'title', 'sec', 'enr')
('NEJS', 188, 'A', 'LEC', 'Fall 2004', 1043, 'Levy, Avigdor', 'RISE/DECLINE:OTTOMAN EMP', '1', 32)
('CHEM', 121, 'A', 'LEC', 'Fall 2004', 1043, 'Foxman, Bruce', 'INORGANIC CHEM I, LECS.', '1', 18)
('CHEM', 130, 'A', 'LEC', 'Fall 2004', 1043, 'Yu, Jinquan', 'ADV ORG. CHEM: STRUCTURE', '1', 14)
('COMP', 1, 'A', 'LEC', 'Fall 2004', 1043, 'Ruesch, Gordon', 'COMPOSITION', '3', 11)


>>  select subj,count(*) from data group by subj


('AAAS', 215)
('AAAS/ENG', 3)
('AAAS/FA', 3)
('AAAS/HIS', 2)
('AAAS/WGS', 9)
('AAPI', 3)
('AAPI/HIS', 4)
('AAPI/WGS', 3)
('AAS/AAPI', 1)
('AMST', 349)
('AMST/ANT', 4)
('AMST/ENG', 3)
('AMST/MUS', 11)
('AMST/SOC', 1)
('ANTH', 698)
('ANTH/ENG', 1)
('ANTH/NEJ', 1)
('ANTH/WGS', 1)
('ARBC', 148)
('BCBP', 58)
('BCHM', 162)
('BCSC', 5)
('BIBC', 8)
('BIOL', 1336)
('BIOP', 7)
('BIOT', 27)
('BIPH', 8)
('BISC', 74)
('BUS', 1332)
('BUS/ECON', 5)
('BUS/FIN', 9)
('CA', 5)
('CAST', 15)
('CBIO', 15)
('CHEM', 1219)
('CHIN', 358)
('CHSC', 31)
('CLAS', 145)
('CLAS/ENG', 1)
('CLAS/FA', 1)
('CLAS/NEJ', 1)
('CLAS/THA', 1)
('COEX', 41)
('COMH', 10)
('COML', 76)
('COML/ENG', 11)
('COML/HOI', 1)
('COML/HUM', 1)
('COML/REC', 1)
('COML/THA', 1)
('COMP', 226)
('COSI', 589)
('CP', 6)
('EAS', 4)
('EBIO', 3)
('ECON', 1361)
('ECON/FA', 5)
('ECON/FIN', 30)
('ECON/HIS', 3)
('ECS', 23)
('ECS/ENG', 2)
('ED', 808)
('EL', 239)
('ENG', 978)
('ENG/HIST', 2)
('ENVS', 130)
('ENVS/THA', 1)
('ESL', 131)
('FA', 1084)
('FA/NEJS', 

>>  select subj,count(*) as n from data group by subj order by n desc limit 10 


('HS', 3201)
('MUS', 1636)
('THA', 1438)
('ECON', 1361)
('BIOL', 1336)
('BUS', 1332)
('MATH', 1275)
('CHEM', 1219)
('FA', 1084)
('ENG', 978)


>>  quit


bye


In [27]:
remove_data_table()
create_data_table()

In [29]:

load_data()


In [30]:
data = get_all_data()
len(data)

35207

In [31]:
print(data[1000])

('EL', 94, 'A', 'LAB', 'Fall 2009', 1093, 'Morris, James', 'EXPERIENTIAL LEARNING PRACTICM', '8', 1)


In [24]:
remove_all_data()
data = get_all_data()
len(data)




0

In [None]:
con= sqlite3.connect('courses.db')
cur = con.cursor()
results = cur.execute("SELECT distinct code from data ")
codes = [row[0] for row in results]
codes = codes[1:]
codes

In [None]:

con.close()
con= sqlite3.connect('courses.db')
cur = con.cursor()
#cur.execute("DELETE FROM data WHERE code NOTNULL")
z = cur.execute("SELECT code,COUNT(*) from data group by code")
for row in z:
    print(z)

In [None]:
for code in codes:
    results = cur.execute("SELECT * from data where subj=(?) and code=(?) order by code, num",('COSI', code ))
    print("Year ",code)
    for row in results:
        print(row)
    print('------',end="\n\n")


In [None]:
results = cur.execute("SELECT num, suf, title, inst from data where subj=(?) and code=(?)",('COSI', 1051))
for row in results:
    print(row)

In [None]:
results = cur.execute("SELECT inst,sum(enr) as n from data where subj=(?) group by inst order by n desc",('COSI', ))
for row in results:
    print(row)

In [None]:
results = cur.execute("SELECT term,code,sum(enr) as n from data where subj=(?) group by code order by code",('COSI', ))
for row in results:
    print(row)

In [None]:
con.commit()
con.close()