In [1]:
import sqlite3
from sqlite3 import Error
import os

# GET THE CURRENT WORKING DIRECTORY
path = os.getcwd()

# DEFINE FUNCTIONS TO MAKE THE CONNECTION
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)

    return conn

def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)

In [2]:
# create a database connection
conn = create_connection(path + "/exercise.db")

## Using Python to create fake data and add it to the exercise database

In [4]:
# !pip3 install faker
from faker import Faker
import pandas as pd

# create fake profiles
fake = Faker()
profileData = [fake.simple_profile() for i in range(100)]
profiles = pd.DataFrame(profileData)

# inspect the new dataframe
profiles.sample(2)

Unnamed: 0,username,name,sex,address,mail,birthdate
0,palmermonica,Samantha Gordon,F,"7692 Nicholas Overpass\nNew Stephanie, VT 28378",ebrown@hotmail.com,1933-09-20
1,rtate,Katrina Cherry,F,"585 Brandi Shoals\nNew Linda, LA 68284",ukennedy@gmail.com,1937-08-14
2,victoria47,Jimmy Moore,M,968 Daniel Throughway Suite 046\nNorth Douglas...,alexa93@yahoo.com,1943-02-17
3,eric20,Brian Payne,M,USNS Chung\nFPO AE 94939,megan11@gmail.com,2001-05-08
4,scottlogan,Margaret Duncan,F,"58641 Wilson Circles\nSchneiderfort, AR 60028",salassandra@gmail.com,1938-06-30


In [9]:
# create another dataframe with the salaries for each user

import random

salaries = pd.DataFrame({"username":profiles.username, 
                         "salary": random.sample(range(100000,500000),len(profiles.username))})

# inspect the new dataframe
salaries.sample(2)

Unnamed: 0,username,salary
34,michaelneal,120785
55,timothyjohnson,491725


### Add the data to the exercise database

In [13]:
# add the dataframe into the database
conn.execute('CREATE TABLE IF NOT EXISTS profiles (username, name, sex, address, mail, birthdate)')
conn.commit()
profiles.to_sql('profiles', conn, if_exists='replace', index = False)

# use pandas query on the connected database
profiles_df = pd.read_sql_query("SELECT * from profiles", conn)

# inspect the data in the database
profiles_df.sample(2)

Unnamed: 0,username,name,sex,address,mail,birthdate
38,cbutler,Ashlee Bass,F,"842 Norman Streets\nNorth Theresaside, WY 90583",shoffman@gmail.com,1976-10-18
81,lnelson,Amanda Novak,F,"0520 Charles Street Apt. 711\nPughport, PW 53627",diazjoseph@hotmail.com,1946-08-19


In [14]:
# add the dataframe into the database
conn.execute('CREATE TABLE IF NOT EXISTS salaries (username, salary)')
conn.commit()
salaries.to_sql('salaries', conn, if_exists='replace', index = False)

# use pandas query on the connected database
salaries_df = pd.read_sql_query("SELECT * from salaries", conn)

# inspect the data in the database
salaries_df.sample(2)

Unnamed: 0,username,salary
7,daviddavis,220698
46,shiggins,163418


## Using Pandas SQL to query the database

In [24]:
# Using inner join to join 2 tables

inner_join_query = '''
                   SELECT p.username,
                          s.salary,
                          p.sex,
                          p.birthdate
                   FROM salaries s 
                   INNER JOIN profiles p ON p.username == s.username
                   '''

pd.read_sql_query(inner_join_query, conn).sample(3)

Unnamed: 0,username,salary,sex,birthdate
59,barbara08,346053,F,1972-05-29
17,xpena,130638,F,1939-04-01
22,caitlyn57,299871,M,1916-10-04


In [25]:
# Using left join and filtering the data

left_join_query = '''
                   SELECT p.username,
                          s.salary,
                          p.sex,
                          p.birthdate
                   FROM salaries s 
                   LEFT JOIN profiles p ON p.username == s.username
                   WHERE p.sex = 'M'
                   '''

pd.read_sql_query(left_join_query, conn).sample(3)

Unnamed: 0,username,salary,sex,birthdate
38,donaldsonalex,287061,M,2011-03-30
5,jesseschmitt,131520,M,1912-11-09
46,larrybanks,279688,M,1959-04-13
