In [25]:
import pandas as pd
from utils.ag_mapping import ag_mapping

class DataProcessor:
    def __init__(self, file_path='./data/wps_gte_2015_pivot.feather'):
        self.file_path = file_path

    def get_initial_data(self):
        df = pd.read_feather(self.file_path)
        df['period'] = pd.to_datetime(df['period'])
        df['period'] = df['period'].dt.strftime('%m/%d/%y')
        df.set_index('period', inplace=True)
        df = df.T
        df.reset_index(inplace=True)
        return df

    def get_ag_mapping(self, df):
        id_to_name_mapping = {key: value['name'] for key, value in ag_mapping.items()}
        id_to_padd_mapping = {key: value['padd'] for key, value in ag_mapping.items()}
        id_to_commodity_mapping = {key: value['commodity'] for key, value in ag_mapping.items()}
        id_to_type_mapping = {key: value['type'] for key, value in ag_mapping.items()}
        id_to_uom_mapping = {key: value['uom'] for key, value in ag_mapping.items()}
        


        df.insert(1, 'name', df['id'].map(id_to_name_mapping))
        df.insert(2, 'padd', df['id'].map(id_to_padd_mapping))
        df.insert(3, 'commodity', df['id'].map(id_to_commodity_mapping))
        df.insert(4, 'type', df['id'].map(id_to_type_mapping))
        df.insert(5, 'uom', df['id'].map(id_to_uom_mapping))

        order_list = list(id_to_name_mapping.keys())        
        df = df.set_index('id').loc[order_list].reset_index()
        
        
        return df

    def get_columns_to_include(self, df, startDate, endDate):
        cols = df.columns.tolist()
        remove_cols_for_evaluation = ['id', 'name', 'padd', 'commodity', 'type', 'uom']
        for col in remove_cols_for_evaluation:
            cols.remove(col)        
        cols = pd.to_datetime(cols, format='%m/%d/%y')
        cols = cols[cols >= startDate]
        cols = cols[cols <= endDate]    
        cols = cols.strftime('%m/%d/%y').tolist()
        cols = remove_cols_for_evaluation + cols
        df = df[cols]    
        return df

    def get_table(self, start='1900-01-01', end='2030-12-31'):
        df = self.get_initial_data()
        df = self.get_ag_mapping(df)
        df = self.get_columns_to_include(df, start, end)
        return df

In [27]:
processor = DataProcessor()
df = processor.get_table()
df

period,id,name,padd,commodity,type,uom,12/26/14,01/02/15,01/09/15,01/16/15,...,08/02/24,08/09/24,08/16/24,08/23/24,08/30/24,09/06/24,09/13/24,09/20/24,09/27/24,10/04/24
0,WCESTUS1,US Commercial Stocks (kb),US,Crude,Stocks,kb,352979.0,348806.0,354195.0,364266.0,...,429321.0,430678.0,426029.0,425183.0,418310.0,419143.0,417513.0,413042.0,416931.0,422741.0
1,WCESTP11,P1 Commercial Stocks (kb),P1,Crude,Stocks,kb,12545.0,11813.0,12485.0,12542.0,...,8778.0,8632.0,7739.0,8831.0,7726.0,7909.0,8030.0,8086.0,8721.0,7810.0
2,WCESTP21,P2 Commercial Stocks (kb),P2,Crude,Stocks,kb,100552.0,102885.0,105172.0,112457.0,...,110704.0,108124.0,106600.0,105131.0,104100.0,101700.0,99470.0,102358.0,103440.0,106201.0
3,WCESTP31,P3 Commercial Stocks (kb),P3,Crude,Stocks,kb,176983.0,172676.0,173585.0,174839.0,...,238296.0,243861.0,239672.0,240185.0,234443.0,239001.0,240751.0,234545.0,237011.0,239934.0
4,WCESTP41,P4 Commercial Stocks (kb),P4,Crude,Stocks,kb,15407.0,15709.0,15443.0,15663.0,...,22824.0,22865.0,22402.0,22093.0,22399.0,21861.0,22106.0,21955.0,22584.0,22121.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,W_NA_YRL_R10_MBBLD,P1 CDU Capacity (kbd),P1,Crude,Cdu Capacity,kbd,1297.0,1297.0,1297.0,1297.0,...,910.0,910.0,910.0,910.0,910.0,910.0,910.0,910.0,910.0,910.0
152,W_NA_YRL_R20_MBBLD,P2 CDU Capacity (kbd),P2,Crude,Cdu Capacity,kbd,3810.0,3810.0,3810.0,3810.0,...,4246.0,4246.0,4246.0,4246.0,4246.0,4246.0,4246.0,4246.0,4246.0,4246.0
153,W_NA_YRL_R30_MBBLD,P3 CDU Capacity (kbd),P3,Crude,Cdu Capacity,kbd,9154.0,9154.0,9170.0,9170.0,...,9987.0,9987.0,9987.0,9987.0,9987.0,9987.0,9987.0,9987.0,9987.0,9987.0
154,W_NA_YRL_R40_MBBLD,P4 CDU Capacity (kbd),P4,Crude,Cdu Capacity,kbd,647.0,647.0,647.0,647.0,...,652.0,652.0,652.0,652.0,652.0,652.0,652.0,652.0,652.0,652.0


In [15]:
id_to_name_mapping = {key: value['name'] for key, value in ag_mapping.items()}
id_to_padd_mapping = {key: value['padd'] for key, value in ag_mapping.items()}
id_to_commodity_mapping = {key: value['commodity'] for key, value in ag_mapping.items()}
id_to_type_mapping = {key: value['type'] for key, value in ag_mapping.items()}
id_to_uom_mapping = {key: value['uom'] for key, value in ag_mapping.items()}


In [21]:
df.head().iloc[:, :7]

period,id,name,padd,commodity,type,uom,12/26/14
0,WCEIMP12,P1 Imports (kbd),P1,Crude,Imports,kbd,588.0
1,WCEIMP22,P2 Imports (kbd),P2,Crude,Imports,kbd,2041.0
2,WCEIMP32,P3 Imports (kbd),P3,Crude,Imports,kbd,3040.0
3,WCEIMP42,P4 Imports (kbd),P4,Crude,Imports,kbd,254.0
4,WCEIMP52,P5 Imports (kbd),P5,Crude,Imports,kbd,1139.0


In [24]:
order_list = list(id_to_name_mapping.keys())
order_list

['WCESTUS1',
 'WCESTP11',
 'WCESTP21',
 'WCESTP31',
 'WCESTP41',
 'WCESTP51',
 'W_EPC0_SAX_YCUOK_MBBL',
 'crudeStocksP2E',
 'WCSSTUS1',
 'W_EPC0_SKA_NUS_MBBL',
 'WCRSTUS1',
 'WCRFPUS2',
 'W_EPC0_FPF_R48_MBBLD',
 'W_EPC0_FPF_SAK_MBBLD',
 'WCEIMUS2',
 'WCEIMP12',
 'WCEIMP22',
 'WCEIMP32',
 'WCEIMP42',
 'WCEIMP52',
 'crudeOriginalAdjustment',
 'WCRRIUS2',
 'WCRRIP12',
 'WCRRIP22',
 'WCRRIP32',
 'WCRRIP42',
 'WCRRIP52',
 'WCREXUS2',
 'crudeStocksP9',
 'crudeRunsP9',
 'grossRunsP9',
 'feedstockRunsP9',
 'crudeImportsP9',
 'WGTSTUS1',
 'WGTSTP11',
 'WGTSTP21',
 'WGTSTP31',
 'WGTSTP41',
 'WGTSTP51',
 'WGTIMUS2',
 'WGTIM_R10-Z00_2',
 'WGTIM_R20-Z00_2',
 'WGTIM_R30-Z00_2',
 'WGTIM_R40-Z00_2',
 'WGTIM_R50-Z00_2',
 'WGFRPUS2',
 'WGFRPP12',
 'WGFRPP22',
 'WGFRPP32',
 'WGFRPP42',
 'WGFRPP52',
 'W_EPM0F_EEX_NUS-Z00_MBBLD',
 'WDISTUS1',
 'WDISTP11',
 'WDISTP21',
 'WDISTP31',
 'WDISTP41',
 'WDISTP51',
 'WDIIMUS2',
 'WDIIM_R10-Z00_2',
 'WDIIM_R20-Z00_2',
 'WDIIM_R30-Z00_2',
 'WDIIM_R40-Z00_2',
 'WDIIM_