# Library

In [1]:
import os

from io import StringIO
import datetime
import pandas as pd

import psycopg2

from pprint import pprint 

# version info
print('', 'Pandas version: ', pd.__version__, '\n', 'Psycopg 2 version:',
      psycopg2.__version__)

 Pandas version:  1.0.3 
 Psycopg 2 version: 2.8.5 (dt dec pq3 ext lo64)


# Connecting to PostgreSQL 

In [6]:
# Connection params
dbname = "postgres"
user = "postgres"
password = "****"
host = "localhost"
port = "5432"

In [3]:
# Basic interaction:

# 1. Create a new database session
connection = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)

# 2. Open a cursor to perform database commands and queries
cursor = connection.cursor()

# 3. Run some SQL command:
cursor.execute("SELECT current_date;")

# 3.1 Fetchone (returns a single tuple)
date = cursor.fetchone()
print("Current date: ", date[0].isoformat())

# 4. Close the cursor
cursor.close()

# 5. Close the connection
connection.close()

Current date:  2020-05-28


In [4]:
# Let's try to connect and print some info
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)
    cursor = conn.cursor()

    # Print some info
    cursor.execute("SELECT current_user;")
    user = cursor.fetchone()

    cursor.execute("SELECT current_database();")
    db = cursor.fetchone()

    cursor.execute("SELECT version();")
    version = cursor.fetchone()

    print('', "Current user: ", user[0], "\n", "Current database: ", db[0], "\n",
          "Version info: ", '-'.join(version[0].split()[:4]), "\n")


except psycopg2.Error as error:
    print("Warning!", error)

finally:
        if conn is not None:
            cursor.close()
            conn.close()

 Current user:  postgres 
 Current database:  postgres 
 Version info:  PostgreSQL-12.1-on-x86_64-apple-darwin, 



# Homework 1

## Create a new Database

In [7]:
# CREATE DATABASE
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)

    # Set autocommit
    conn.autocommit = True
    cur = conn.cursor()

    # Create a new database
    cur.execute("CREATE DATABASE smm695;")
    print(cur.statusmessage)


except psycopg2.Error as error:
    print("Warning!", error)

finally:
        if conn is not None:
            cursor.close()
            conn.close()

CREATE DATABASE


## CREATE SCHEMA and TABLES

In [8]:
# Set the new db
dbname = 'smm695'

In [9]:
# Connect to smm695 and perform some tasks
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)
    cur = conn.cursor()

    # Create a schema
    cur.execute("CREATE SCHEMA homework1;")

    print(cur.statusmessage, '\n')

    # Create a table storing employees data
    cur.execute("""CREATE TABLE homework1.employees (
    id serial, 
    first_name  varchar(20),
    last_name varchar(20),
    email varchar(80),
    salary numeric);""")

    print(cur.statusmessage, '\n')

    # Create a table storing people data

    second_table_sql = """CREATE TABLE homework1.person (
    id serial,
    name varchar(10),
    country varchar(20),
    postal_code text,
    dob date);"""

    cur.execute(second_table_sql)

    print(cur.statusmessage)

    # Commit changes
    conn.commit()

except psycopg2.Error as error:
    print("Warning!", error)
    
finally:
        if conn is not None:
            cursor.close()
            conn.close()

CREATE SCHEMA 

CREATE TABLE 

CREATE TABLE


## Insert values: loop

In [10]:
# data to insert
employees = """
first_name, last_name, email,                 salary
Dave,       Alstom,    davealstom@google.com, 50000
Hunter,     Reese,     reese1998@hotmail.nl,  37000
Kerys,      Mcclure,   mcclure@gmail.com,     28000
"""
# loading the database
db = pd.read_csv(StringIO(employees),
                 skiprows=1,
                 skipinitialspace=True,
                 engine='python')

# obtaining tuples
values = list(zip(*map(db.get, db)))

# array of tuples 
values

[('Dave', 'Alstom', 'davealstom@google.com', 50000),
 ('Hunter', 'Reese', 'reese1998@hotmail.nl', 37000),
 ('Kerys', 'Mcclure', 'mcclure@gmail.com', 28000)]

In [11]:
# Connect to smm695 and perform some tasks
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)
    cur = conn.cursor()
    
    # SQL INSERT
    insert_sql = """INSERT INTO homework1.employees (first_name, last_name, email, salary)
    VALUES (%s, %s, %s, %s)"""
    
    # Insert data
    for value in values:
        cur.execute(insert_sql, value)
        print(cur.statusmessage)

    # Commit changes
    conn.commit()

except  psycopg2.Error as error:
    print("Warning!", error)

finally:
        if conn is not None:
            cursor.close()
            conn.close()

INSERT 0 1
INSERT 0 1
INSERT 0 1


## Inserting values: executemany

In [12]:
values = [
('Martina',  'Italy', '04929',datetime.date(1995,6,19)),
('Lenny', 'United Kingdom',  'E2 9AD',  datetime.date(1980,5,13)),
('Taha', 'China', '100023',   datetime.date(1994,9,12))]

In [13]:
# Connect to smm695 and perform some tasks
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)
    cur = conn.cursor()
    
    # SQL INSERT
    insert_sql = """INSERT INTO homework1.person (name, country, postal_code, dob)
    VALUES (%s, %s, %s, %s);"""
    
    # Insert data
    cur.executemany(insert_sql, values)

    # Commit changes
    conn.commit()


except psycopg2.Error as error:
    print("Warning!", error)

finally:
        if conn is not None:
            cursor.close()
            conn.close()

# Homework 2

## Create Schema and Table

In [14]:
# Connect to smm695 and perform some tasks
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)
    cur = conn.cursor()

    # Create a schema
    cur.execute("CREATE SCHEMA homework2;")

    print(cur.statusmessage, '\n')

    # Create a table storing employees data
    cur.execute("""CREATE TABLE homework2.employee (
    id serial PRIMARY KEY,
    name varchar(20) NOT NULL,
    department_id int,
    phone_number varchar(20) UNIQUE,
    salary numeric NOT NULL,
    CHECK (department_id = 1 OR department_id = 2));""")

    print(cur.statusmessage, '\n')

    # Commit changes
    conn.commit()


except psycopg2.Error as error:
    print("Warning!", error)

finally:
        if conn is not None:
            cursor.close()
            conn.close()

CREATE SCHEMA 

CREATE TABLE 



## Insert values: dictionary

In [15]:
employees = """
name,     department_id, phone_number, salary
John,     2,            690.623.6568,  20000
Leo,      1,            690.623.6708,  80000
Diana,    1,            690.623.0007,  110000
Paula,    2,            690.623.6500,  35000
Simon,    2,            690.623.9834,  45000
Jennifer, 2,                        ,  18000
"""

# loading the database
db = pd.read_csv(StringIO(employees),
                 skiprows=1,
                 skipinitialspace=True,
                 engine='python')

# data to dictionary
array_dict = db.to_dict('records')

pprint(array_dict)

[{'department_id': 2,
  'name': 'John',
  'phone_number': '690.623.6568',
  'salary': 20000},
 {'department_id': 1,
  'name': 'Leo',
  'phone_number': '690.623.6708',
  'salary': 80000},
 {'department_id': 1,
  'name': 'Diana',
  'phone_number': '690.623.0007',
  'salary': 110000},
 {'department_id': 2,
  'name': 'Paula',
  'phone_number': '690.623.6500',
  'salary': 35000},
 {'department_id': 2,
  'name': 'Simon',
  'phone_number': '690.623.9834',
  'salary': 45000},
 {'department_id': 2, 'name': 'Jennifer', 'phone_number': nan, 'salary': 18000}]


In [16]:
# Connect to smm695 and perform some tasks
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)
    cur = conn.cursor()
    
    # SQL INSERT
    insert_sql = """INSERT INTO homework2.employee (name, department_id, phone_number, salary)
    VALUES (%(name)s, %(department_id)s, %(phone_number)s, %(salary)s)"""
    
    # Insert data:
    cur.executemany(insert_sql, array_dict)

    # Commit changes
    conn.commit()


except  psycopg2.Error as error:
    print("Warning!", error)

finally:
        if conn is not None:
            cursor.close()
            conn.close()

## Tasks 5 to 7

In [17]:
#Connect to smm695 and perform some tasks
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)
    cur = conn.cursor()

    # 5. Calculate the average salary by department
    cur.execute("""
    SELECT department_id, ROUND(AVG(salary),2) AS avg_salary  FROM homework2.employee 
    GROUP BY department_id ORDER BY department_id;
    """)
    five = cur.fetchall()

    # 6. Calculate the maximum salary by department
    cur.execute("""
    SELECT department_id, MAX(salary) AS max_salary  FROM homework2.employee 
    GROUP BY department_id ORDER BY department_id;
    """)
    six = cur.fetchall()
    
    # 7. Count the number of observations that contain the letter 'o' in the 'name' field
    cur.execute("""
    SELECT COUNT(*) AS count_o  FROM homework2.employee 
    WHERE name LIKE '%o%';
    """)
    seven = cur.fetchone()
    
    # Show results
    print('', 'Average salary by department: ', five, '\n',
         'Maximum salary by department: ', six, '\n',
         'Observations that contain the letter o: ', seven[0])
    

except psycopg2.Error as error:
    print("Warning!", error)

finally:
        if conn is not None:
            cursor.close()
            conn.close()

 Average salary by department:  [(1, Decimal('95000.00')), (2, Decimal('29500.00'))] 
 Maximum salary by department:  [(1, Decimal('110000')), (2, Decimal('45000'))] 
 Observations that contain the letter o:  3


## Import csv

In [19]:
SRV = '/YOUR/PATH'
PATH = 'TO/THE'
FOLDER = 'FOLDER'

In [19]:
dbname = 'mydb'

# Connect to smm695 and perform some tasks
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)
    cur = conn.cursor()

    # Create car table
    cur.execute("""
    CREATE TABLE people.car (
    id bigserial PRIMARY KEY,
    car_make varchar(50),
    car_model varchar(50),
    car_year int,
    price numeric);
    """)
    print(cur.statusmessage, '\n')

    # Create location table
    cur.execute("""
    CREATE TABLE people.location (
    id bigserial PRIMARY KEY,
    country varchar(50),
    city varchar(50),
    street_name varchar(50),
    street_number int,
    postal_code varchar(50));
    """)
    print(cur.statusmessage, '\n')

    # Copy car.csv
    car = open(os.path.join(SRV, PATH, FOLDER, 'car.csv'), 'r') #file-like object
    copy_car = """
           COPY people.car(car_make, car_model, car_year, price) FROM stdin WITH CSV HEADER
           DELIMITER as ','
           """
    cur.copy_expert(copy_car, file=car)
    
    cur.execute("""SELECT * FROM people.car;""")
    car_fetch = cur.fetchmany(2)
    print( 'Car:', [car_fetch[i] for i in range(0, len(car_fetch), 1)], '\n')

    # Copy location.csv
    location = open(os.path.join(SRV, PATH, FOLDER, 'location.csv'), 'r') #file-like object
    copy_location = """
           COPY people.location(country, city, street_name, street_number, postal_code) FROM stdin WITH CSV HEADER
           DELIMITER as ','
           """
    cur.copy_expert(copy_location, file=location)
    
    cur.execute("""SELECT * FROM people.location;""")
    location_fetch = cur.fetchmany(2)
    print( 'Location:', [location_fetch[i] for i in range(0, len(location_fetch), 1)])

    # Commit changes
    conn.commit()


except psycopg2.Error as error:
    print("Warning!", error)

finally:
        if conn is not None:
            cursor.close()
            conn.close()

CREATE TABLE 

CREATE TABLE 

Car: [(1, 'Kia', 'Sportage', 2002, Decimal('95327')), (2, 'Ford', 'ZX2', 2001, Decimal('59149'))] 

Location: [(1, 'Poland', '≈Åodygowice', 'Porter', 0, '34-325'), (2, 'China', 'Gaozhuang', 'Spohn', 3, None)]


## Task 9 and 10

In [20]:
#Connect to smm695 and perform some tasks
try:
    conn = psycopg2.connect(dbname=dbname,
                            user=user,
                            password=password,
                            host=host,
                            port=port)
    cur = conn.cursor()

    # 9. Count the number of female observations with `dob' between 1990 and 1992
    cur.execute("""
    SELECT COUNT(*) FROM people.person  
    WHERE gender = 'Female' AND EXTRACT(YEAR FROM dob) BETWEEN 1990 AND 1992;
    """)
    nine = cur.fetchall()

    # 10. Select the two car models with the highest price for the last ten years
    cur.execute("""
    SELECT car_model, MAX(price) FROM people.car 
    WHERE car_year BETWEEN 2010 AND 2020 
    GROUP BY car_model 
    ORDER BY MAX(price) DESC LIMIT 2;
    """)
    ten = cur.fetchall()

    # Show results
    print(
        '',
        'Count: ',
        nine[0][0],
        '\n',
        'Car models: ',
        '\n',
        '1.',
        ten[0][0],
        '$',
        ten[0][1],
        '\n',
        '2.',
        ten[1][0],
        '$',
        ten[1][1],
    )


except psycopg2.Error as error:
    print("Warning!", error)

finally:
        if conn is not None:
            cursor.close()
            conn.close()

 Count:  112 
 Car models:  
 1. Caliber $ 999718 
 2. MKT $ 995884
