# SQLite

In [1]:
import sqlite3
import pandas as pd
import pdcast as pdc

In [2]:
# loading data
conn = sqlite3.connect("../data/sql-murder-mystery.db")

## Fetching via Cursor

In [3]:
cursor = conn.cursor()

In [4]:
cursor.execute("SELECT * FROM person")
rows = cursor.fetchall()
rows

[(10000, 'Christoper Peteuil', 993845, 624, 'Bankhall Ave', '747714076'),
 (10007, 'Kourtney Calderwood', 861794, 2791, 'Gustavus Blvd', '477972044'),
 (10010, 'Muoi Cary', 385336, 741, 'Northwestern Dr', '828638512'),
 (10016, 'Era Moselle', 431897, 1987, 'Wood Glade St', '614621061'),
 (10025, 'Trena Hornby', 550890, 276, 'Daws Hill Way', '223877684'),
 (10027, 'Antione Godbolt', 439509, 2431, 'Zelham Dr', '491650087'),
 (10034, 'Kyra Buen', 920494, 1873, 'Sleigh Dr', '332497972'),
 (10039, 'Francesco Agundez', 278151, 736, 'Buswell Dr', '861079251'),
 (10095, 'Leslie Thate', 729987, 2772, 'Camellia Park Circle', '127944356'),
 (10122, 'Alva Conkel', 779002, 116, 'Diversey Circle', '148521773'),
 (10126, 'Denver Barness', 791807, 1232, 'Via Escuela Rd', '923716908'),
 (10127, 'Yessenia Fossen', 449378, 3087, 'Ash St', '314282107'),
 (10141, 'Brittney Garfield', 627292, 2303, 'E Glen Park Ave', '914555313'),
 (10145, 'Adolfo Milbury', 556561, 2261, 'S Burr Blvd', '883502067'),
 (10152

In [5]:
# looping through the rows

for row in rows:
    print(row)

(10000, 'Christoper Peteuil', 993845, 624, 'Bankhall Ave', '747714076')
(10007, 'Kourtney Calderwood', 861794, 2791, 'Gustavus Blvd', '477972044')
(10010, 'Muoi Cary', 385336, 741, 'Northwestern Dr', '828638512')
(10016, 'Era Moselle', 431897, 1987, 'Wood Glade St', '614621061')
(10025, 'Trena Hornby', 550890, 276, 'Daws Hill Way', '223877684')
(10027, 'Antione Godbolt', 439509, 2431, 'Zelham Dr', '491650087')
(10034, 'Kyra Buen', 920494, 1873, 'Sleigh Dr', '332497972')
(10039, 'Francesco Agundez', 278151, 736, 'Buswell Dr', '861079251')
(10095, 'Leslie Thate', 729987, 2772, 'Camellia Park Circle', '127944356')
(10122, 'Alva Conkel', 779002, 116, 'Diversey Circle', '148521773')
(10126, 'Denver Barness', 791807, 1232, 'Via Escuela Rd', '923716908')
(10127, 'Yessenia Fossen', 449378, 3087, 'Ash St', '314282107')
(10141, 'Brittney Garfield', 627292, 2303, 'E Glen Park Ave', '914555313')
(10145, 'Adolfo Milbury', 556561, 2261, 'S Burr Blvd', '883502067')
(10152, 'Shanita Grigaliunas', 2060

## Fetching via Pandas

In [6]:
df = pd.read_sql_query("SELECT * FROM person JOIN interview ON person.id = interview.person_id", conn)
df.head()

Unnamed: 0,id,name,license_id,address_number,address_street_name,ssn,person_id,transcript
0,28508,Raylene Goldsby,531984,1696,S Holland St,817820356,28508,‘I deny it!’ said the March Hare.\n
1,63713,Josh Cunnane,990581,3445,Evers Way,334859137,63713,\n
2,86208,Samual Sojourner,902792,2703,S Carlinda Way,504537535,86208,"way, and the whole party swam to the shore.\n"
3,35267,Johana Lugardo,648853,830,Dulles Toll St,163403073,35267,"lessons in here? Why, there’s hardly room for ..."
4,33856,Bernie Shelmon,928638,316,Marl Pat Rd,789619111,33856,\n


## Down Casting

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4991 entries, 0 to 4990
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   id                   4991 non-null   int64 
 1   name                 4991 non-null   object
 2   license_id           4991 non-null   int64 
 3   address_number       4991 non-null   int64 
 4   address_street_name  4991 non-null   object
 5   ssn                  4991 non-null   object
 6   person_id            4991 non-null   int64 
 7   transcript           4991 non-null   object
dtypes: int64(4), object(4)
memory usage: 312.1+ KB


In [8]:
df_manual_downcast = df.astype({'id': 'uint32',
                'person_id': 'uint32',
                'license_id': 'uint32',
                'address_number': 'uint16'})
df_manual_downcast.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4991 entries, 0 to 4990
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   id                   4991 non-null   uint32
 1   name                 4991 non-null   object
 2   license_id           4991 non-null   uint32
 3   address_number       4991 non-null   uint16
 4   address_street_name  4991 non-null   object
 5   ssn                  4991 non-null   object
 6   person_id            4991 non-null   uint32
 7   transcript           4991 non-null   object
dtypes: object(4), uint16(1), uint32(3)
memory usage: 224.3+ KB


In [9]:
df_downcast = pdc.downcast(df)
df_downcast.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4991 entries, 0 to 4990
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype   
---  ------               --------------  -----   
 0   id                   4991 non-null   uint32  
 1   name                 4991 non-null   object  
 2   license_id           4991 non-null   uint32  
 3   address_number       4991 non-null   uint16  
 4   address_street_name  4991 non-null   object  
 5   ssn                  4991 non-null   object  
 6   person_id            4991 non-null   uint32  
 7   transcript           4991 non-null   category
dtypes: category(1), object(3), uint16(1), uint32(3)
memory usage: 274.6+ KB


## Export Data

In [31]:
df.to_csv("../data/person.csv")

## Close DB Connection

In [15]:
conn.close()