# Chapter 1

### Pandas SQL

```
from pandasql import sqldf
import pandas as pd

# Create helper function for easier query execution
execute = lambda q: sqldf(q, globals())

# Load your CSV files into DataFrames
df1 = pd.read_csv("file1.csv")
df2 = pd.read_csv("file2.csv")

# Execute query with a join and store the result
query = """
    SELECT *
    FROM df1
    JOIN df2 ON df1.common_column = df2.common_column
"""
result_df = execute(query)

# Show results
result_df.head()
```

### Pandas sqlite

```
import sqlite3
import pandas as pd

def execute(query, database_path='dataset/database.sqlite'):
    connection = sqlite3.connect(database_path)
    result = connection.execute(query).fetchall()
    column_names = [description[0] for description in connection.execute(query).description]
    df = pd.DataFrame(result, columns=column_names)
    connection.close()
    return df
def get_table_names(database_path='dataset/database.sqlite'):
    connection = sqlite3.connect(database_path)
    query = "SELECT name FROM sqlite_master WHERE type='table';"
    result = connection.execute(query).fetchall()
    table_names = [row[0] for row in result]
    connection.close()
    return table_names

# Get and print all table names in the database
tables = get_table_names()
print("Tables in the database:", tables)
from pandasql import sqldf

# Create helper function for easier query execution
execute_df = lambda q: sqldf(q, globals())

```

### CASE

```
SELECT
season,
ROUND(AVG(
    CASE         -- Start CASE
        WHEN hometeam_id = 8455 AND home_goal > away_goal THEN 1
        WHEN hometeam_id = 8455 AND home_goal < away_goal THEN 0
        ELSE NULL
    END          -- End CASE
        ), 2) AS pct_homewins,
FROM table_name
WHERE hometeam_id = 8455 OR awayteam_id = 8455  -- ADVISED : make sure to filter, else the new column will contain many NULLS with all data
GROUP BY season;
```

# Chapter 2

### Sub-query

```
-- Simple query with two tables
SELECT left_table.colx, right_table.coly
FROM left_table, right_table
WHERE left_table.common_col = right_table.common_col

-- Subquery inside FROM (turning a query into virtual table using alias, evolving from the previous query)
SELECT left_table.colx, right_table.coly
FROM left_table,
    (SELECT coly, colz, common_col          -- subquery start
    FROM another_table) AS right_table      -- subquery end
WHERE left_table.common_col = right_table.common_col
ORDER BY continent;


-- Subquery inside WHERE (You use it in Semi join / Anti Join)
SELECT left_table.*
FROM left_table
WHERE left_table.some_col <NOT> IN  -- Using "NOT" will result in Anti join
        (                           -- subquery start
            SELECT another_col      
            FROM right_table
        );                          -- subquery end

-- Subquery inside SELECT (REMEMBER: This query should only produce a single value)
SELECT
    outer_table.id,
    (SELECT COUNT(*) FROM inner_table WHERE outer_table.id = inner_table.id) AS new_col_1,              -- subquery 1
    (SELECT AVG(some_column) FROM inner_table WHERE outer_table.id = inner_table.id) AS new_col_2,      -- subquery 2
    (SELECT MAX(another_column) FROM inner_table WHERE outer_table.id = inner_table.id) AS new_col_3    -- subquery 3
FROM
    outer_table;

-- Using the WITH keyword (creating multiple virtual tables)
WITH
result_table1 AS (
    SELECT col1, col2 FROM t1),             -- subquery 1
result_table2 AS (
    SELECT colx, coly FROM t2)              -- subquery 2
SELECT * FROM result_table1, result_table2
WHERE result_table1.col1 <= result_table2.col2
```

# Chapter 3

### Correlated Query

```
-- DISCOURAGED : inner table is evaluated in loops, once for each row of the outer_table

-- Example 1
SELECT id, name, salary 
FROM outer_table
WHERE
  salary > ( SELECT AVG(salary) 
  FROM inner_table
  WHERE inner_table.department_id = outer_table.department_id );

-- Example 2
SELECT
  outer_table.id,
  outer_table.name,
  outer_table.department,
  (SELECT inner_table.salary FROM inner_table WHERE inner_table.employee_id = outer_table.id) AS employee_salary
FROM
  outer_table;
```