# Creating a DB in sqlite3


## SQL in sqlite3

In [1]:
%%capture
%%bash
apt-get update
apt-get install -y sqlite3 wamerican-insane


In [2]:
!sqlite3 --help
!sqlite3 --version

Usage: sqlite3 [OPTIONS] FILENAME [SQL]
FILENAME is the name of an SQLite database. A new database is created
if the file does not previously exist.
OPTIONS include:
   -A ARGS...           run ".archive ARGS" and exit
   -append              append the database to the end of the file
   -ascii               set output mode to 'ascii'
   -bail                stop after hitting an error
   -batch               force batch I/O
   -box                 set output mode to 'box'
   -column              set output mode to 'column'
   -cmd COMMAND         run "COMMAND" before reading stdin
   -csv                 set output mode to 'csv'
   -deserialize         open the database using sqlite3_deserialize()
   -echo                print commands before execution
   -init FILENAME       read/process named file
   -[no]header          turn headers on or off
   -help                show this message
   -html                set output mode to HTML
   -interactive         force interactive I/O
   -j

In [3]:
!ls -l /usr/share/dict/


total 6768
-rw-r--r-- 1 root root 6922426 Jan 20  2022 american-english-insane
-rw-r--r-- 1 root root     199 Dec  2  2021 README.select-wordlist
lrwxrwxrwx 1 root root      30 Oct 16 19:43 words -> /etc/dictionaries-common/words


In [4]:
!wc -l /usr/share/dict/american-english-insane


663473 /usr/share/dict/american-english-insane


In [5]:
%%bash
egrep '^[a-z]{5}$' /usr/share/dict/american-english-insane | wc -l

17516


In [6]:
%%bash
{
  echo "blah blah blah"
  echo
  echo
  echo words{1..7} | tr ' ' ,
  egrep '^[a-z]{5}$' /usr/share/dict/words |
    shuf -n 420 |
    paste -d, - - - - - - |
    cat -n |
    tr -d ' ' |
    tr '\t' ,
} > /tmp/words.5.csv


In [7]:
# Show the first few entries
!head /tmp/words.5.csv | cat -nvet


     1	blah blah blah$
     2	$
     3	$
     4	words1,words2,words3,words4,words5,words6,words7$
     5	1,clies,droud,crunt,licet,skort,yeats$
     6	2,wiver,scaud,looms,calve,skeer,rebop$
     7	3,infix,exsec,shule,baroi,hakas,impar$
     8	4,dotes,tript,chold,fetwa,ulmic,porry$
     9	5,fauve,serry,sneed,glits,pylar,suant$
    10	6,sarda,lilas,memes,binna,nails,didst$


In [8]:
# Count the number of lines, words, characters
!wc /tmp/words.5.csv


  74   74 2790 /tmp/words.5.csv


In [9]:
# Remove any existing database
!rm -rf words.sql3


In [10]:
# Import CSV into SQLite3
%%script sqlite3 --column --header words.sql3
.mode csv
.import '| grep -A 100000000 ^words /tmp/words.5.csv' words


In [11]:
# Show that the database now exists
!ls -l


total 12
drwxr-xr-x 1 root root 4096 Oct 14 13:23 sample_data
-rw-r--r-- 1 root root 8192 Oct 16 19:48 words.sql3


In [12]:
# Query the table
%%script sqlite3 --column --header words.sql3
select *
from words
limit 10


words1  words2  words3  words4  words5  words6  words7
------  ------  ------  ------  ------  ------  ------
1       clies   droud   crunt   licet   skort   yeats 
2       wiver   scaud   looms   calve   skeer   rebop 
3       infix   exsec   shule   baroi   hakas   impar 
4       dotes   tript   chold   fetwa   ulmic   porry 
5       fauve   serry   sneed   glits   pylar   suant 
6       sarda   lilas   memes   binna   nails   didst 
7       gavia   bahur   kiaat   cryer   butyr   kuzus 
8       fused   ugric   rucky   drear   carot   nonas 
9       rasps   rhyme   gulpy   layer   decyl   gaols 
10      abied   guijo   tangi   yaray   csect   gamer 


In [13]:
# View schema
%%script sqlite3 --column --header words.sql3
.schema


CREATE TABLE IF NOT EXISTS "words"(
  "words1" TEXT,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);


In [14]:
# Create a table with correct data types, including a primary key ( unique, indexed column )
%%script sqlite3 --column --header words.sql3
CREATE TABLE words_real(
  "words1" INTEGER PRIMARY KEY ASC,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);


In [15]:
# View schema
%%script sqlite3 --column --header words.sql3
.schema


CREATE TABLE IF NOT EXISTS "words"(
  "words1" TEXT,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);
CREATE TABLE words_real(
  "words1" INTEGER PRIMARY KEY ASC,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);


In [16]:
# Select data into correct table, i.e. "copy" the data
%%script sqlite3 --column --header words.sql3
INSERT INTO words_real
SELECT *
FROM words


In [17]:
# Show the schema and the first 10 entries
%%script sqlite3 --column --header words.sql3
.schema
select * from words_real limit 10 ;


CREATE TABLE IF NOT EXISTS "words"(
  "words1" TEXT,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);
CREATE TABLE words_real(
  "words1" INTEGER PRIMARY KEY ASC,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);
words1  words2  words3  words4  words5  words6  words7
------  ------  ------  ------  ------  ------  ------
1       clies   droud   crunt   licet   skort   yeats 
2       wiver   scaud   looms   calve   skeer   rebop 
3       infix   exsec   shule   baroi   hakas   impar 
4       dotes   tript   chold   fetwa   ulmic   porry 
5       fauve   serry   sneed   glits   pylar   suant 
6       sarda   lilas   memes   binna   nails   didst 
7       gavia   bahur   kiaat   cryer   butyr   kuzus 
8       fused   ugric   rucky   drear   carot   nonas 
9       rasps   rhyme   gulpy   layer   decyl   gaols 
10      abied   guijo   tangi   yaray   csect   gamer 


In [18]:
# Create an index on the last column
%%script sqlite3 --column --header words.sql3
create index if not exists words7 on words_real (words7) ;


In [19]:
# Show the schema, noting the index
%%script sqlite3 --column --header words.sql3
.schema


CREATE TABLE IF NOT EXISTS "words"(
  "words1" TEXT,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);
CREATE TABLE words_real(
  "words1" INTEGER PRIMARY KEY ASC,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);
CREATE INDEX words7 on words_real (words7) ;


In [21]:
# Create a derived column
%%script sqlite3 --column --header words.sql3
select *, words6 || "+" || words7 as "both"
from words_real
limit 10


words1  words2  words3  words4  words5  words6  words7  both       
------  ------  ------  ------  ------  ------  ------  -----------
1       clies   droud   crunt   licet   skort   yeats   skort+yeats
2       wiver   scaud   looms   calve   skeer   rebop   skeer+rebop
3       infix   exsec   shule   baroi   hakas   impar   hakas+impar
4       dotes   tript   chold   fetwa   ulmic   porry   ulmic+porry
5       fauve   serry   sneed   glits   pylar   suant   pylar+suant
6       sarda   lilas   memes   binna   nails   didst   nails+didst
7       gavia   bahur   kiaat   cryer   butyr   kuzus   butyr+kuzus
8       fused   ugric   rucky   drear   carot   nonas   carot+nonas
9       rasps   rhyme   gulpy   layer   decyl   gaols   decyl+gaols
10      abied   guijo   tangi   yaray   csect   gamer   csect+gamer


## Using pandas and sqlite3 Python modules

In [22]:
import sqlite3 as db
import pandas as pd


In [23]:
# Connect to a sqlite database and load a result set into a data frame
db_con = db.connect("words.sql3")
words = pd.read_sql_query("select * from words", db_con)
words


Unnamed: 0,words1,words2,words3,words4,words5,words6,words7
0,1,clies,droud,crunt,licet,skort,yeats
1,2,wiver,scaud,looms,calve,skeer,rebop
2,3,infix,exsec,shule,baroi,hakas,impar
3,4,dotes,tript,chold,fetwa,ulmic,porry
4,5,fauve,serry,sneed,glits,pylar,suant
...,...,...,...,...,...,...,...
65,66,solar,tunis,sstor,utchy,trone,mesas
66,67,drunk,larin,eases,strub,bushy,varia
67,68,kazak,scobs,brage,pulus,ngana,muist
68,69,alamo,ragas,apism,waits,birde,krems


In [24]:
# Show some info about the data frame
words.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70 entries, 0 to 69
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   words1  70 non-null     object
 1   words2  70 non-null     object
 2   words3  70 non-null     object
 3   words4  70 non-null     object
 4   words5  70 non-null     object
 5   words6  70 non-null     object
 6   words7  70 non-null     object
dtypes: object(7)
memory usage: 4.0+ KB


In [25]:
# Query the database and load the result into a data frame
words_real = pd.read_sql_query("select * from words_real", db_con)
words_real


Unnamed: 0,words1,words2,words3,words4,words5,words6,words7
0,1,clies,droud,crunt,licet,skort,yeats
1,2,wiver,scaud,looms,calve,skeer,rebop
2,3,infix,exsec,shule,baroi,hakas,impar
3,4,dotes,tript,chold,fetwa,ulmic,porry
4,5,fauve,serry,sneed,glits,pylar,suant
...,...,...,...,...,...,...,...
65,66,solar,tunis,sstor,utchy,trone,mesas
66,67,drunk,larin,eases,strub,bushy,varia
67,68,kazak,scobs,brage,pulus,ngana,muist
68,69,alamo,ragas,apism,waits,birde,krems


In [26]:
# Show some info about the data frame
words_real.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70 entries, 0 to 69
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   words1  70 non-null     int64 
 1   words2  70 non-null     object
 2   words3  70 non-null     object
 3   words4  70 non-null     object
 4   words5  70 non-null     object
 5   words6  70 non-null     object
 6   words7  70 non-null     object
dtypes: int64(1), object(6)
memory usage: 4.0+ KB


In [27]:
# Show the first 10 entries in the new data frame
words_real2 = words_real[:10]
words_real2


Unnamed: 0,words1,words2,words3,words4,words5,words6,words7
0,1,clies,droud,crunt,licet,skort,yeats
1,2,wiver,scaud,looms,calve,skeer,rebop
2,3,infix,exsec,shule,baroi,hakas,impar
3,4,dotes,tript,chold,fetwa,ulmic,porry
4,5,fauve,serry,sneed,glits,pylar,suant
5,6,sarda,lilas,memes,binna,nails,didst
6,7,gavia,bahur,kiaat,cryer,butyr,kuzus
7,8,fused,ugric,rucky,drear,carot,nonas
8,9,rasps,rhyme,gulpy,layer,decyl,gaols
9,10,abied,guijo,tangi,yaray,csect,gamer


In [28]:
# Save the entire dataframe to a new table in the existing database
words_real2.to_sql("words_real2", db_con, if_exists="replace")


10

In [29]:
# Show the schema of the entire database
%%script sqlite3 --column --header words.sql3
.schema


CREATE TABLE IF NOT EXISTS "words"(
  "words1" TEXT,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);
CREATE TABLE words_real(
  "words1" INTEGER PRIMARY KEY ASC,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);
CREATE INDEX words7 on words_real (words7) ;
CREATE TABLE IF NOT EXISTS "words_real2" (
"index" INTEGER,
  "words1" INTEGER,
  "words2" TEXT,
  "words3" TEXT,
  "words4" TEXT,
  "words5" TEXT,
  "words6" TEXT,
  "words7" TEXT
);
CREATE INDEX "ix_words_real2_index"ON "words_real2" ("index");
