In [1]:
import psycopg2
import pandas as pd 

In [2]:
def create_connection(database, password, host="localhost",port="5432", user="postgres"):
    """"
    Description: This function create a postgresql connection object
    @ input params: database name,  password, and default values
    @ output params: create a connection
    """

    connection = None
    try:
        print('Connecting to the postgreSQL database ...')
        connection = psycopg2.connect(
            host=host, 
            port=port,
            database=database,
            user=user,
            password=password
        )
    except(Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if connection is not None:
            # connection.close()
            print('Database connection created.')
    
    return connection

In [3]:
def load_data(connection, sql_query, values=None):
    """"
    Description: This function runs the data pull from postgresql and save it as a dataframe 
    @ input params: postgresql connection object, sql query to run 
    @ output params: return a dataframe
    """
    try:
        # create a cursor 
        cursor = connection.cursor()
        cursor.execute(sql_query, values) # None
        df = pd.DataFrame(cursor.fetchall())
        df.columns = [x[0] for x in cursor.description]
        cursor.close()
        return df
    except(Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if connection is not None:
            connection.commit()
            connection.close()
            print('Database connection terminated.')

    # return df 

```sql  
--## DISTINCT vs GROUP BY
SELECT first_name, COUNT(*)
FROM actor
GROUP BY first_name;
```

Two methods: 
using `groupby` and   
using `value_count`

In [None]:
schema = "public"
table_name = "actor"
sql_select = "SELECT * FROM {}.{}".format(schema, table_name) # public.aws
conn = create_connection("users", "postgres") ## just create the connection to postgresql 
psg_df = load_data(connection=conn, sql_query=sql_select)

#### 01. using `.groupby()` 

#### 02. using `.value_count()`

```sql 
--##
SELECT
	customer_id,
	Total_spend,
	CASE WHEN Total_spend <= 30 THEN 'Bronze'
		 WHEN Total_spend <= 60 THEN 'Silver'
		 WHEN Total_spend <= 90 THEN 'Gold'
		 ELSE  'Diamon'
	END AS Customer_grading
FROM (
	SELECT customer_id, SUM(amount) AS Total_spend
	FROM payment
	GROUP BY customer_id
	ORDER BY 2 ) sub ;
```

In [None]:
schema = "public"
table_name = "actor"
sql_select = "SELECT * FROM {}.{}".format(schema, table_name) # public.aws
conn = create_connection("users", "postgres") ## just create the connection to postgresql 
psg_df = load_data(connection=conn, sql_query=sql_select)

#### 01. using `.map()` 

#### 01. using `.apply()`  and `lambda` function