# Data intgeration notebook

Use this notebook to update the database with your new data

## PostgreSQL connection check

Let's firt check that your database connectin works.

Adapt the credentials to your configuration and run this cell to make sure you can connect to your database.

In [None]:
POSTGRES_HOST = "localhost"
POSTGRES_PORT = "5432"
POSTGRES_DB = "hack_zurich"
POSTGRES_USER = "hack_zurich"
POSTGRES_PASSWORD = "hack_zurich"
INPUT_DATA_FOLDER = "input_data"

import psycopg2

db_connection = psycopg2.connect(
    dbname=POSTGRES_DB,
    user=POSTGRES_USER,
    password=POSTGRES_PASSWORD,
    host=POSTGRES_HOST,
    port=POSTGRES_PORT)

db_connection.set_session(autocommit=True)

db_cursor = db_connection.cursor()

db_cursor.execute("SELECT * FROM accessibility_bus LIMIT 50")

print("Connection to PostgreSQL works")

# Data update

Run this cell to update your data (add new rows).

## Purge database and import new data

This cell will purge the database. **Make sure to backup your database before running this cell.**

In [None]:
import csv

db_cursor.execute('TRUNCATE indicator_values2')
db_cursor.execute('TRUNCATE indicators CASCADE')
db_cursor.execute('TRUNCATE spatialunit CASCADE')
db_cursor.execute('TRUNCATE commune_type CASCADE')

with open(f"{INPUT_DATA_FOLDER}/EN_T_COMMUNE_TYPE.csv", "r") as csv_file:
    reader = csv.reader(csv_file)

    next(reader) # Remove the headers

    for commune_type_id, description in reader:
        db_cursor.execute("""
            INSERT INTO commune_type 
            VALUES (
                %(commune_type_id)s,
                %(description)s
        )""", {
            'commune_type_id': commune_type_id,
            'description': description
        })

with open(f"{INPUT_DATA_FOLDER}/EN_T_SPATIALUNIT.csv", "r") as csv_file:
    reader = csv.reader(csv_file)

    next(reader) # Remove the headers

    for spatialunit_id, type_id, name, bfs_nr, name_combined, district_id, region_id, commune_type_id, zip_code, tel, fax, homepage, email, adresse, height, area in reader:
        db_cursor.execute("""
            INSERT INTO spatialunit 
            VALUES (
                %(spatialunit_id)s,
                %(type_id)s,
                %(name)s,
                %(bfs_nr)s,
                %(name_combined)s,
                %(district_id)s,
                %(region_id)s,
                %(commune_type_id)s,
                %(zip)s,
                %(tel)s,
                %(fax)s,
                %(homepage)s,
                %(email)s,
                %(adresse)s,
                %(height)s,
                %(area)s
        )""", {
            'spatialunit_id': int(spatialunit_id),
            'type_id': type_id if type_id != '' else None,
            'name': name,
            'bfs_nr': bfs_nr if bfs_nr != '' else None,
            'name_combined': name_combined,
            'district_id': district_id if district_id != '' else None,
            'region_id': region_id if region_id != '' else None,
            'commune_type_id': commune_type_id if commune_type_id != '' else None,
            'zip': zip_code,
            'tel': tel,
            'fax': fax,
            'homepage': homepage,
            'email': email,
            'adresse': adresse,
            'height': height if height != '' else None,
            'area': area if area != '' else None,
        })

with open(f"{INPUT_DATA_FOLDER}/EN_INDICATORS.csv", "r") as csv_file:
    reader = csv.reader(csv_file)

    next(reader) # Remove the headers

    for indicator_id, name, description, source, unit_short, unit_long, current_date, min_year, max_year in reader:
        db_cursor.execute("""
            INSERT INTO indicators
            VALUES (
                %(indicator_id)s,
                %(name)s,
                %(description)s,
                %(source)s,
                %(unit_short)s,
                %(unit_long)s,
                %(current_date)s,
                %(min_year)s,
                %(max_year)s
        )""", {
            'indicator_id': indicator_id,
            'name': name,
            'description': description,
            'source': source or None,
            'unit_short': unit_short or None,
            'unit_long': unit_long or None,
            'current_date': current_date or None,
            'min_year': min_year or None,
            'max_year': max_year or None
        })
        
with open(f"{INPUT_DATA_FOLDER}/EN_INDICATOR_VALUES.csv", "r") as csv_file:
    reader = csv.reader(csv_file)

    next(reader) # Remove the headers

    for indicator_id, spatialunit_id, year, value, value_addition, cat in reader:
        db_cursor.execute("""
            INSERT INTO indicator_values2
            VALUES (
                %(indicator_id)s,
                %(spatialunit_id)s,
                %(year)s,
                %(value)s,
                %(value_addition)s,
                %(cat)s
        )""", {
            'indicator_id': indicator_id,
            'spatialunit_id': spatialunit_id,
            'year': year,
            'value': value,
            'value_addition': value_addition,
            'cat': cat
        })
        
print("Data has been imported into the PostgreSQL database.")

## Views creation

Run he below cell to generate the SQL code necessary to create the views.

In [None]:
import csv

with open(f"{INPUT_DATA_FOLDER}/INDICATORS_VIEWS.csv", "r") as views_csv:
    reader = csv.reader(views_csv)

    next(reader) # Remove the headers

    for indicator_id, short_description, question_type, view_name, view_column_value in reader:
        # TODO: maybe use 'CREATE OR REPLACE VIEW' instead ?
         db_cursor.execute(f"""
CREATE VIEW {view_name} AS
SELECT v.value as "{view_column_value}", v.year, v.spatialunit_id
FROM indicators i inner join indicator_values2 v on i.indicator_id = v.indicator_id
WHERE trim(i.name) = '{short_description}';
""")