# Data Wrangling - Create POD Return History from NAV History
### POD Allocation | Project 3

Goal: Read POD NAV History, convert NAV to RETURN and SAVE as New Table with Return History

Steps

- Setup Cursors and connection to Containers
- Create Relational Databases
- Read Table, convert from NAV to Return and Create New Table

In [1]:
import math
import numpy as np
import pandas as pd

import psycopg2
from psycopg2.extras import execute_values

In [2]:
#
# function to run a select query and return rows in a pandas dataframe
# pandas puts all numeric values from postgres to float
# if it will fit in an integer, change it to integer
#

def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    "function to run a select query and return rows in a pandas dataframe"
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # fix the float columns that really should be integers
    
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)

In [3]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [4]:
cursor = connection.cursor()

#### pod_nav_history

In [5]:
#
# return all rows from TEMP_POD_NAV_HISTORY
#

rollback_before_flag = True
rollback_after_flag = True

query = """

select * 
from temp_pod_nav_history
"""

nav_hist = my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

#
# convert from NAV to return
# 

nav_hist.set_index(['pod_id', 'date'], inplace=True)
nav_hist.sort_index(inplace=True)

ret_hist = nav_hist.groupby(level=0)['price'].pct_change().dropna().to_frame().rename(columns={'price': 'return'})

ret_hist.reset_index(inplace=True)
ret_hist['date'] = pd.to_datetime(ret_hist['date']).dt.date
ret_hist = ret_hist[['date', 'pod_id', 'return']]
ret_hist

Unnamed: 0,date,pod_id,return
0,2014-12-12,0431214C JP,-0.024288
1,2014-12-19,0431214C JP,0.002448
2,2014-12-26,0431214C JP,0.021779
3,2015-01-02,0431214C JP,-0.006375
4,2015-01-09,0431214C JP,-0.010024
...,...,...,...
16850,2025-06-06,WSEEAUH ID,0.010999
16851,2025-06-13,WSEEAUH ID,-0.008340
16852,2025-06-20,WSEEAUH ID,-0.009871
16853,2025-06-27,WSEEAUH ID,0.016098


In [6]:
#
# Insert Pod Return records into TEMP_POD_RET_HISTORY
# 

columns = ['pod_id', 'date', 'return']
records = ret_hist[columns].to_records(index=False)

query = """
INSERT INTO temp_pod_ret_history (pod_id, date, return)
VALUES %s
"""

try:
    connection.rollback()
    execute_values(cursor, query, records)
    connection.commit()
    print("Data inserted")
except Exception as e:
    connection.rollback()
    print("Insert failed:", e)


Data inserted


In [7]:
#
# query TEMP_POD_RET_HISTORY to see if table was loaded successfully
#

rollback_before_flag = True
rollback_after_flag = True

query = """

select * 
from temp_pod_ret_history
limit 5;

"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,date,pod_id,return
0,2014-12-12,0431214C JP,-0.024288
1,2014-12-19,0431214C JP,0.002448
2,2014-12-26,0431214C JP,0.021779
3,2015-01-02,0431214C JP,-0.006375
4,2015-01-09,0431214C JP,-0.010024
