In [24]:
import pandas as pd
import sqlite3



df_small = pd.read_csv('people-100.csv')
df_large = pd.read_csv('people-500000.csv')

conn = sqlite3.connect("people_case.db")

df_small.to_sql("people_small",conn, if_exists="replace",index=False)
df_large.to_sql("people_large",conn, if_exists="replace",index=False)


conn.close

<function Connection.close()>

## OUTPUT OF ABOVE Commands

1. people_small and people_large tables are being created



<img src="./Sqlite Mystery Trail/Creating_SQlite_tables.png">

In [25]:
import time
from tabulate import tabulate

def run_query(query,cur):
    start_time = time.perf_counter()
    cur.execute(query)
    end_time = time.perf_counter()
    elapsed_time_ms = (end_time - start_time)*1000
    
    result = cur.fetchall()
    headers = [desc[0] for desc in cur.description]

    print(f"\n Query executed in {elapsed_time_ms:.2f} ms")
    
    if result:
        print("\n" + tabulate(result, headers=headers, tablefmt="pretty"))
    else:
        print("\n No rows returned.")
    

In [26]:
conn = sqlite3.connect("people_case.db")
cur = conn.cursor()

count_people_small_query = """SELECT count(*) as Total_count_large FROM people_small;"""
count_peopel_large_query = """SELECT count(*) as Total_count_large FROM people_large;"""

run_query(count_people_small_query,cur)
run_query(count_peopel_large_query,cur)


 Query executed in 0.25 ms

+-------------------+
| Total_count_large |
+-------------------+
|        100        |
+-------------------+

 Query executed in 20.72 ms

+-------------------+
| Total_count_large |
+-------------------+
|      500000       |
+-------------------+


In [27]:
people_age_small_db = """
    SELECT count(*) FROM people_small                    
    WHERE
   (STRFTIME('%Y','now') - STRFTIME('%Y',"Date of birth")) - 
   (STRFTIME('%m-%d','now') < STRFTIME('%m-%d',"Date of birth")) > 50;
"""

people_age_large_db = """
    SELECT count(*) FROM people_large
    WHERE
   (STRFTIME('%Y','now') - STRFTIME('%Y',"Date of birth")) - 
   (STRFTIME('%m-%d','now') < STRFTIME('%m-%d',"Date of birth")) > 50;
"""

run_query(people_age_small_db,cur)
run_query(people_age_large_db,cur)



 Query executed in 0.80 ms

+----------+
| count(*) |
+----------+
|    58    |
+----------+

 Query executed in 398.65 ms

+----------+
| count(*) |
+----------+
|  293620  |
+----------+


In [28]:
get_avg_job_title_small="""
                    SELECT DISTINCT("job title"), Avg(Salary) 
                    AS "Average Salary" 
                    FROM people_small 
                    GROUP BY "job title" 
                    """

get_avg_job_title_large="""
                    SELECT DISTINCT("job title"), Avg(Salary) 
                    AS "Average Salary" 
                    FROM people_large
                    GROUP BY "job title" 
                    """
# ERROR : As Salary column is not present in the data link given
# run_query(get_avg_job_title_small,cur)
# run_query(get_avg_job_title_small,cur)

In [29]:
top_five_small="""
                SELECT * FROM people_small ORDER BY Salary LIMIT 5;
               """
top_five_large="""
                SELECT * FROM people_large ORDER BY Salary LIMIT 5;
               """
               
# Error: As Salary column in not present in the data link given

In [30]:
# Optional Advance: Find how many people work in each city.
# Changing the city with job title as city is not present in the dataset
count_people_per_city_small=""" 
                            SELECT DISTINCT("Job Title"), COUNT(*) FROM people_small 
                            GROUP BY "Job Title"; 
                            """
                            
run_query(count_people_per_city_small,cur)


 Query executed in 0.40 ms

+-------------------------------------------------------------+----------+
|                          Job Title                          | COUNT(*) |
+-------------------------------------------------------------+----------+
|              Accountant, chartered management               |    2     |
|                    Accounting technician                    |    1     |
|                Advertising account executive                |    1     |
|                   Agricultural consultant                   |    1     |
|                         Air broker                          |    1     |
|                        Archaeologist                        |    1     |
|                     Art gallery manager                     |    1     |
|                   Audiological scientist                    |    1     |
|                          Barrister                          |    1     |
|                    Biochemist, clinical                     |    3   

In [31]:
count_people_per_city_large=""" 
                            SELECT DISTINCT("Job Title"), COUNT(*) FROM people_large 
                            GROUP BY "Job Title"; 
                            """
run_query(count_people_per_city_large,cur)


 Query executed in 177.74 ms

+-------------------------------------------------------------+----------+
|                          Job Title                          | COUNT(*) |
+-------------------------------------------------------------+----------+
|                     Academic librarian                      |   761    |
|                    Accommodation manager                    |   837    |
|                    Accountant, chartered                    |   817    |
|               Accountant, chartered certified               |   762    |
|              Accountant, chartered management               |   758    |
|            Accountant, chartered public finance             |   758    |
|                    Accounting technician                    |   760    |
|                            Actor                            |   792    |
|                           Actuary                           |   799    |
|                        Acupuncturist                        |   825