# Exploratory Data Analysis

In [4]:
import sqlite3
import pandas as pd
from pathlib import Path

from tqdm.notebook import tqdm
from typing import Union

proj_path = Path().absolute().parent
data_path = proj_path / 'data' 

# SQLite3 Database

```
airmeasure
-------------------------------------
sid           INTEGER PRIMARY KEY
measure_code  INTEGER NOT NULL UNIQUE
district      TEXT 
measure_name  TEXT
address       TEXT
measure_point TEXT
```

```
airquality
-------------------------------------
airid         INTEGER PRIMARY KEY
measure_code  INTEGER
datetime      TEXT
SO2           REAL
CO            REAL
O3            REAL
NO2           REAL
PM10          REAL
PM25          REAL
```

```
holiday
-------------------------------------
hid   INTEGER PRIMARY KEY
date  TEXT
day   TEXT
name  TEXT
type  TEXT
```

```
weather
-------------------------------------
wid INTEGER PRIMARY KEY
measure_code            INTEGER
measure_name            TEXT
datetime                TEXT
temperature             REAL
temperature_flag        REAL
precipitation           REAL
precipitation_flag      REAL
wind_speed              REAL
wind_speed_flag         REAL
wind_direction          REAL
wind_direction_flag     REAL
humidity                REAL
humidity_flag           REAL
vapor_pressure          REAL
dew_point_temperature   REAL
local_pressure          REAL
local_pressure_flag     REAL
sea_level_pressure      REAL
sea_level_pressure_flag REAL
sunshine                REAL
sunshine_flag           REAL
solar_radiation         REAL
solar_radiation_flag    REAL
snow                    REAL
snow_3hour              REAL
cloud                   REAL
mid_level_cloud         REAL
cloud_type              TEXT
lowest_cloud            REAL
visibility              INTEGER
ground_status_code      REAL
weather_status_code     REAL
ground_temperature      REAL
ground_temperature_flag REAL
5cm_soil_temperature    REAL
10cm_soil_temperature   REAL
20cm_soil_temperature   REAL
30cm_soil_temperature   REAL
```



In [14]:
class DBEngine:
    """SQLite3 DB Engine Wrapper"""
    def __init__(self, db_path: Union[str, Path]):
        self.conn = sqlite3.connect(db_path)

    def sql(self, query: str):
        cur = self.conn.cursor()
        res = cur.execute(query)
        res = res.fetchall()
        cur.close()
        return res

    def quit(self):
        self.conn.close()

db = DBEngine(db_path=data_path / "airpollution.db")

# EDA

## Airquality

```
airmeasure
-------------------------------------
sid           INTEGER PRIMARY KEY
measure_code  INTEGER NOT NULL UNIQUE
district      TEXT 
measure_name  TEXT
address       TEXT
measure_point TEXT
```

```
airquality
-------------------------------------
airid         INTEGER PRIMARY KEY
measure_code  INTEGER
datetime      TEXT
SO2           REAL
CO            REAL
O3            REAL
NO2           REAL
PM10          REAL
PM25          REAL
```

In [15]:
# What is the date range?
query = """
SELECT datetime
FROM airquality
LIMIT 5
"""
res = db.sql(query)
print(res)

[]


In [16]:
cur = db.conn.cursor()
cur.execute(query)

<sqlite3.Cursor at 0x1739782e880>

In [17]:
cur.fetchall()

[]

In [None]:
query = """

"""
db.sql()