# SBA Data Pipeline: Parquet + SAM
## **Parquet Files**

### Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import polars as pl

### Configure Polars to show all dataframe rows

In [2]:
pl.Config.set_tbl_rows(20)

polars.config.Config

### Identify Necessary Columns (from Final Columns notebook)

In [3]:
cols_Kr = ['CO_BUS_SIZE_DETERMINATION', # =="SMALL BUSSINES
        'PRINCIPAL_NAICS_CODE', 
        'PIID',
        'CURRENT_CONTRACT_VALUE',
        'NUMBER_OF_OFFERS_RECEIVED',
        'EVALUATED_PREFERENCE',
        'EXTENT_COMPETED',
        'MODIFICATION_NUMBER',
        'DATE_SIGNED',
        'FUNDING_AGENCY_NAME',
        'FUNDING_OFFICE_NAME',
        'FUNDING_DEPARTMENT_NAME']

# Silas 
cols_S = ['VENDOR_UEI',
        'TYPE_OF_SET_ASIDE',
        'VENDOR_ADDRESS_ZIP_CODE',
        'VENDOR_ADDRESS_COUNTRY_NAME', # == USA
        'EDUCATIONAL_INSTITUTION_FLAG',
        'FIRM_8A_FLAG',
        'WOMEN_OWNED_FLAG',
        'FIRM8A_JOINT_VENTURE',
        'FEDERALLY_FUNDED_R_AND_D_CORP',
        'CORP_ENTITY_NOT_TAX_EXEMPT',
        'PARTNERSHIP_OR_LLP',
        'SOLE_PROPREITORSHIP',
        'SMALL_AGRICULTURAL_COOPERATIVE',
        'INTERNATIONAL_ORGANIZATION',
        'ARCHITECTURE_AND_ENGINEERING',
        'COMMUNITY_CORP_OWNED_FIRM',
        'CONSTRUCTION_FIRM',
        'DOMESTIC_SHELTER',
        'FOUNDATION',
        'MANUFACTURER_OF_GOODS',
        'RESEARCH_AND_DEVELOPMENT',
        'SERVICE_PROVIDER',
        'VETERINARY_HOSPITAL',
        'HISPANIC_SERVICING_INSTITUTION',
        'LIMITED_LIABILITY_CORPORATION']
        

# Kevin 
cols_Kv = ['IDV_CONTRACTING_AGENCY_NAME',
        'IDV_EXTENT_COMPETED',
        'IDV_EVALUATED_PREFERENCE',
        'IDV_SIGNED_DATE',
        'IDV_LAST_DATE_TO_ORDER',
        'IDV_NUMBER_OF_OFFERS',
        'PART8_OR_PART13',
        'AWARD_FISCAL_YEAR',
        'PRODUCT_OR_SERVICE_TYPE',
        'TOTAL_ESTIMATED_ORDER_VALUE']

# David
cols_D = ['FOR_PROFIT_ORGANIZATION',
        'DOT_CERTIFIED_DISADV_BUS',
        'SDB','FISCAL_YEAR', 
        'CAGE_CODE']

### Aggregate the columns above into one list (to import data)

In [4]:
all_cols = cols_D + cols_Kv + cols_S + cols_Kr

## Test code to ensure the parquet files are being read in correctly
### Read the parquet files into a dataframe using polars

In [5]:
data2022 = pl.read_parquet("2022.parquet", columns = all_cols, n_rows=20)

In [6]:
data2022.head(20)

FOR_PROFIT_ORGANIZATION,DOT_CERTIFIED_DISADV_BUS,SDB,FISCAL_YEAR,CAGE_CODE,IDV_CONTRACTING_AGENCY_NAME,IDV_EXTENT_COMPETED,IDV_EVALUATED_PREFERENCE,IDV_SIGNED_DATE,IDV_LAST_DATE_TO_ORDER,IDV_NUMBER_OF_OFFERS,PART8_OR_PART13,AWARD_FISCAL_YEAR,PRODUCT_OR_SERVICE_TYPE,TOTAL_ESTIMATED_ORDER_VALUE,VENDOR_UEI,TYPE_OF_SET_ASIDE,VENDOR_ADDRESS_ZIP_CODE,VENDOR_ADDRESS_COUNTRY_NAME,EDUCATIONAL_INSTITUTION_FLAG,FIRM_8A_FLAG,WOMEN_OWNED_FLAG,FIRM8A_JOINT_VENTURE,FEDERALLY_FUNDED_R_AND_D_CORP,CORP_ENTITY_NOT_TAX_EXEMPT,PARTNERSHIP_OR_LLP,SOLE_PROPREITORSHIP,SMALL_AGRICULTURAL_COOPERATIVE,INTERNATIONAL_ORGANIZATION,ARCHITECTURE_AND_ENGINEERING,COMMUNITY_CORP_OWNED_FIRM,CONSTRUCTION_FIRM,DOMESTIC_SHELTER,FOUNDATION,MANUFACTURER_OF_GOODS,RESEARCH_AND_DEVELOPMENT,SERVICE_PROVIDER,VETERINARY_HOSPITAL,HISPANIC_SERVICING_INSTITUTION,LIMITED_LIABILITY_CORPORATION,CO_BUS_SIZE_DETERMINATION,PRINCIPAL_NAICS_CODE,PIID,CURRENT_CONTRACT_VALUE,NUMBER_OF_OFFERS_RECEIVED,EVALUATED_PREFERENCE,EXTENT_COMPETED,MODIFICATION_NUMBER,DATE_SIGNED,FUNDING_AGENCY_NAME,FUNDING_OFFICE_NAME,FUNDING_DEPARTMENT_NAME
str,str,str,str,str,str,str,str,datetime[ms],datetime[ms],i32,str,str,str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,i32,str,str,str,datetime[ms],str,str,str
"""YES""","""NO""","""NO""",,"""87MR5""","""DEFENSE LOGIST…","""A""","""NONE""",2019-04-23 00:00:00,2023-03-31 00:00:00,148.0,,"""2022""","""P""",,"""G568KMLKNMC5""","""NONE""","""773032004""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""324110""","""SPE60722FEULE""",196.25,,"""NONE""","""A""","""0""",2022-08-05 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""",,"""0R7C4""","""DEFENSE LOGIST…","""A""","""NONE""",2018-04-04 00:00:00,2022-03-31 00:00:00,1.0,,"""2022""","""P""",,"""Z4SKYSSHK3Z9""","""NONE""","""891193257""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""324110""","""SPE60722FD92Y""",466.78,,"""NONE""","""A""","""0""",2022-04-20 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""",,"""9T645""","""DEFENSE LOGIST…","""A""","""NONE""",2020-07-13 00:00:00,2023-08-05 00:00:00,2.0,,"""2022""","""P""",,"""F88AH3GNC5T3""","""NONE""","""338803433""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""311812""","""SPE30022F30FU""",162.8,,"""NONE""","""A""","""0""",2022-02-23 00:00:00,"""DEFENSE LOGIST…","""DLA TROOP SUPP…","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""",,"""87MR5""","""DEFENSE LOGIST…","""A""","""NONE""",2019-04-23 00:00:00,2023-03-31 00:00:00,148.0,,"""2022""","""P""",,"""G568KMLKNMC5""","""NONE""","""773032004""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""324110""","""SPE60722FBG33""",58.1,,"""NONE""","""A""","""0""",2022-01-04 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""",,"""5P8G0""","""DEFENSE LOGIST…","""A""","""NONE""",2019-08-12 00:00:00,2023-03-31 00:00:00,145.0,,"""2022""","""P""",,"""G9N5DJF9U275""","""NONE""","""365422818""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""324110""","""SPE60722FC1X4""",4144.78,,"""NONE""","""A""","""0""",2022-02-23 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""",,"""9T645""","""DEFENSE LOGIST…","""A""","""NONE""",2020-07-13 00:00:00,2023-08-05 00:00:00,2.0,,"""2022""","""P""",,"""F88AH3GNC5T3""","""NONE""","""338803433""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""311812""","""SPE30022F3N66""",153.0,,"""NONE""","""A""","""0""",2022-03-25 00:00:00,"""DEFENSE LOGIST…","""DLA TROOP SUPP…","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""",,"""1VZR7""","""DEFENSE LOGIST…","""F""","""NONE""",2013-11-07 00:00:00,2014-11-06 00:00:00,1.0,,"""2022""","""P""",,"""Q5UVH3KMTMD8""","""SBA""","""494246407""","""UNITED STATES""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""334419""","""SPE4A619F0659""",-3226.79,,"""NONE""","""F""","""P00001""",2022-03-02 00:00:00,"""DEFENSE LOGIST…","""DLA AVIATION""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""",,"""5CFE9""",,,,,,,,"""2022""","""S""",,"""DLZVGV3MGF31""","""NONE""","""209014846""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""711510""","""95170020P0012""",-12250.0,1.0,"""NONE""","""G""","""P00004""",2022-09-19 00:00:00,"""UNITED STATES …","""VOA NEWS CENTE…","""UNITED STATES …"
"""YES""","""NO""","""YES""",,"""7M0A2""",,,,,,,,"""2022""","""S""",,"""R4S8VME25J37""","""SBA""","""281179400""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""336611""","""70Z08022PMECP0…",138653.0,4.0,"""NONE""","""F""","""0""",2022-02-02 00:00:00,"""U.S. COAST GUA…","""USCG FINANCE C…","""HOMELAND SECUR…"
"""YES""","""NO""","""NO""",,"""8JF28""",,,,,,,,"""2022""","""S""",,"""C533SZKRAMH1""","""8AN""","""208522952""","""UNITED STATES""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""541511""","""80GSFC20C0125""",0.0,1.0,"""NONE""","""B""","""P00013""",2021-11-09 00:00:00,"""NATIONAL AERON…","""NASA GODDARD S…","""NATIONAL AERON…"


### Replace FISCAL_YEAR's null values with the correct year

In [7]:
data2022 = data2022.with_columns(pl.col("FISCAL_YEAR").fill_null(2022))

### Make sure the FISCAL_YEAR value is filled below

In [8]:
data2022.head(20)

FOR_PROFIT_ORGANIZATION,DOT_CERTIFIED_DISADV_BUS,SDB,FISCAL_YEAR,CAGE_CODE,IDV_CONTRACTING_AGENCY_NAME,IDV_EXTENT_COMPETED,IDV_EVALUATED_PREFERENCE,IDV_SIGNED_DATE,IDV_LAST_DATE_TO_ORDER,IDV_NUMBER_OF_OFFERS,PART8_OR_PART13,AWARD_FISCAL_YEAR,PRODUCT_OR_SERVICE_TYPE,TOTAL_ESTIMATED_ORDER_VALUE,VENDOR_UEI,TYPE_OF_SET_ASIDE,VENDOR_ADDRESS_ZIP_CODE,VENDOR_ADDRESS_COUNTRY_NAME,EDUCATIONAL_INSTITUTION_FLAG,FIRM_8A_FLAG,WOMEN_OWNED_FLAG,FIRM8A_JOINT_VENTURE,FEDERALLY_FUNDED_R_AND_D_CORP,CORP_ENTITY_NOT_TAX_EXEMPT,PARTNERSHIP_OR_LLP,SOLE_PROPREITORSHIP,SMALL_AGRICULTURAL_COOPERATIVE,INTERNATIONAL_ORGANIZATION,ARCHITECTURE_AND_ENGINEERING,COMMUNITY_CORP_OWNED_FIRM,CONSTRUCTION_FIRM,DOMESTIC_SHELTER,FOUNDATION,MANUFACTURER_OF_GOODS,RESEARCH_AND_DEVELOPMENT,SERVICE_PROVIDER,VETERINARY_HOSPITAL,HISPANIC_SERVICING_INSTITUTION,LIMITED_LIABILITY_CORPORATION,CO_BUS_SIZE_DETERMINATION,PRINCIPAL_NAICS_CODE,PIID,CURRENT_CONTRACT_VALUE,NUMBER_OF_OFFERS_RECEIVED,EVALUATED_PREFERENCE,EXTENT_COMPETED,MODIFICATION_NUMBER,DATE_SIGNED,FUNDING_AGENCY_NAME,FUNDING_OFFICE_NAME,FUNDING_DEPARTMENT_NAME
str,str,str,str,str,str,str,str,datetime[ms],datetime[ms],i32,str,str,str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,i32,str,str,str,datetime[ms],str,str,str
"""YES""","""NO""","""NO""","""2022""","""87MR5""","""DEFENSE LOGIST…","""A""","""NONE""",2019-04-23 00:00:00,2023-03-31 00:00:00,148.0,,"""2022""","""P""",,"""G568KMLKNMC5""","""NONE""","""773032004""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""324110""","""SPE60722FEULE""",196.25,,"""NONE""","""A""","""0""",2022-08-05 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""","""2022""","""0R7C4""","""DEFENSE LOGIST…","""A""","""NONE""",2018-04-04 00:00:00,2022-03-31 00:00:00,1.0,,"""2022""","""P""",,"""Z4SKYSSHK3Z9""","""NONE""","""891193257""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""324110""","""SPE60722FD92Y""",466.78,,"""NONE""","""A""","""0""",2022-04-20 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""","""2022""","""9T645""","""DEFENSE LOGIST…","""A""","""NONE""",2020-07-13 00:00:00,2023-08-05 00:00:00,2.0,,"""2022""","""P""",,"""F88AH3GNC5T3""","""NONE""","""338803433""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""311812""","""SPE30022F30FU""",162.8,,"""NONE""","""A""","""0""",2022-02-23 00:00:00,"""DEFENSE LOGIST…","""DLA TROOP SUPP…","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""","""2022""","""87MR5""","""DEFENSE LOGIST…","""A""","""NONE""",2019-04-23 00:00:00,2023-03-31 00:00:00,148.0,,"""2022""","""P""",,"""G568KMLKNMC5""","""NONE""","""773032004""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""324110""","""SPE60722FBG33""",58.1,,"""NONE""","""A""","""0""",2022-01-04 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""","""2022""","""5P8G0""","""DEFENSE LOGIST…","""A""","""NONE""",2019-08-12 00:00:00,2023-03-31 00:00:00,145.0,,"""2022""","""P""",,"""G9N5DJF9U275""","""NONE""","""365422818""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""324110""","""SPE60722FC1X4""",4144.78,,"""NONE""","""A""","""0""",2022-02-23 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""","""2022""","""9T645""","""DEFENSE LOGIST…","""A""","""NONE""",2020-07-13 00:00:00,2023-08-05 00:00:00,2.0,,"""2022""","""P""",,"""F88AH3GNC5T3""","""NONE""","""338803433""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""311812""","""SPE30022F3N66""",153.0,,"""NONE""","""A""","""0""",2022-03-25 00:00:00,"""DEFENSE LOGIST…","""DLA TROOP SUPP…","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""","""2022""","""1VZR7""","""DEFENSE LOGIST…","""F""","""NONE""",2013-11-07 00:00:00,2014-11-06 00:00:00,1.0,,"""2022""","""P""",,"""Q5UVH3KMTMD8""","""SBA""","""494246407""","""UNITED STATES""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""334419""","""SPE4A619F0659""",-3226.79,,"""NONE""","""F""","""P00001""",2022-03-02 00:00:00,"""DEFENSE LOGIST…","""DLA AVIATION""","""DEPT OF DEFENS…"
"""YES""","""NO""","""NO""","""2022""","""5CFE9""",,,,,,,,"""2022""","""S""",,"""DLZVGV3MGF31""","""NONE""","""209014846""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""711510""","""95170020P0012""",-12250.0,1.0,"""NONE""","""G""","""P00004""",2022-09-19 00:00:00,"""UNITED STATES …","""VOA NEWS CENTE…","""UNITED STATES …"
"""YES""","""NO""","""YES""","""2022""","""7M0A2""",,,,,,,,"""2022""","""S""",,"""R4S8VME25J37""","""SBA""","""281179400""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""336611""","""70Z08022PMECP0…",138653.0,4.0,"""NONE""","""F""","""0""",2022-02-02 00:00:00,"""U.S. COAST GUA…","""USCG FINANCE C…","""HOMELAND SECUR…"
"""YES""","""NO""","""NO""","""2022""","""8JF28""",,,,,,,,"""2022""","""S""",,"""C533SZKRAMH1""","""8AN""","""208522952""","""UNITED STATES""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""541511""","""80GSFC20C0125""",0.0,1.0,"""NONE""","""B""","""P00013""",2021-11-09 00:00:00,"""NATIONAL AERON…","""NASA GODDARD S…","""NATIONAL AERON…"


### Combine Funding Agency with Funding Office for a column output

In [9]:
data2022 = data2022.with_columns(
    FUNDING_OFFICE =pl.col("FUNDING_OFFICE_NAME") + " - " + pl.col("FUNDING_AGENCY_NAME"))

### Ensure the code above executed successfully

In [10]:
list(data2022.select("FUNDING_OFFICE"))[0][0]

'DLA ENERGY - DEFENSE LOGISTICS AGENCY'

***Another way to create this column, though it would need to be added back to the dataframe separately***

In [11]:
FUNDING_OFFICE = data2022.select([pl.concat_str(["FUNDING_OFFICE_NAME","FUNDING_AGENCY_NAME"], 
separator = ' - ').alias('FUNDING_OFFICE')]) 

***Ensure the code above executed successfully, would need to add more code for dataframe aggregation***

In [12]:
list(FUNDING_OFFICE)[0][0]

'DLA ENERGY - DEFENSE LOGISTICS AGENCY'

## Automate the process for all the necessary parquet files
### Create a list of relevant years given the parquet file names (ex. *2022.parquet*)

In [13]:
year_list = list(range(2009,2023))

### Change the year values from an integer type to a string type

In [14]:
year_list = [str(year) for year in year_list]

In [15]:
year_list

['2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017',
 '2018',
 '2019',
 '2020',
 '2021',
 '2022']

### Create an empty dictionary to store all of the dataframes

In [16]:
d = {}

### Run a for loop using the year list to read in all the parquet files into polars and perform the necessary changes for our purposes

In [17]:
for x in year_list:
    file = x + ".parquet"
    #df_name = "df_" + x
    d[x] = pl.read_parquet(file, columns = all_cols, n_rows=15)
    d[x] = d[x].with_columns(pl.col("FISCAL_YEAR").fill_null(int(x)))
    d[x] = d[x].with_columns(
    FUNDING_OFFICE = pl.col("FUNDING_OFFICE_NAME") + " - " + pl.col("FUNDING_AGENCY_NAME")
        )

### Make a list to hold all of the dataframes in the dictionary

In [18]:
list_df_all = list(d.values())

### Stack all of the dataframes to form one large dataframe

In [19]:
concatenated_dataframe = pl.concat(list_df_all, how="vertical_relaxed")

### Test to ensure the above code worked

In [20]:
concatenated_dataframe.head(20)

FOR_PROFIT_ORGANIZATION,DOT_CERTIFIED_DISADV_BUS,SDB,FISCAL_YEAR,CAGE_CODE,IDV_CONTRACTING_AGENCY_NAME,IDV_EXTENT_COMPETED,IDV_EVALUATED_PREFERENCE,IDV_SIGNED_DATE,IDV_LAST_DATE_TO_ORDER,IDV_NUMBER_OF_OFFERS,PART8_OR_PART13,AWARD_FISCAL_YEAR,PRODUCT_OR_SERVICE_TYPE,TOTAL_ESTIMATED_ORDER_VALUE,VENDOR_UEI,TYPE_OF_SET_ASIDE,VENDOR_ADDRESS_ZIP_CODE,VENDOR_ADDRESS_COUNTRY_NAME,EDUCATIONAL_INSTITUTION_FLAG,FIRM_8A_FLAG,WOMEN_OWNED_FLAG,FIRM8A_JOINT_VENTURE,FEDERALLY_FUNDED_R_AND_D_CORP,CORP_ENTITY_NOT_TAX_EXEMPT,PARTNERSHIP_OR_LLP,SOLE_PROPREITORSHIP,SMALL_AGRICULTURAL_COOPERATIVE,INTERNATIONAL_ORGANIZATION,ARCHITECTURE_AND_ENGINEERING,COMMUNITY_CORP_OWNED_FIRM,CONSTRUCTION_FIRM,DOMESTIC_SHELTER,FOUNDATION,MANUFACTURER_OF_GOODS,RESEARCH_AND_DEVELOPMENT,SERVICE_PROVIDER,VETERINARY_HOSPITAL,HISPANIC_SERVICING_INSTITUTION,LIMITED_LIABILITY_CORPORATION,CO_BUS_SIZE_DETERMINATION,PRINCIPAL_NAICS_CODE,PIID,CURRENT_CONTRACT_VALUE,NUMBER_OF_OFFERS_RECEIVED,EVALUATED_PREFERENCE,EXTENT_COMPETED,MODIFICATION_NUMBER,DATE_SIGNED,FUNDING_AGENCY_NAME,FUNDING_OFFICE_NAME,FUNDING_DEPARTMENT_NAME,FUNDING_OFFICE
str,str,str,str,str,str,str,str,datetime[ms],datetime[ms],i32,str,str,str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,i32,str,str,str,datetime[ms],str,str,str,str
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""SBA""","""277090000""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""541511""","""M0014609PH017""",,,"""NONE""","""D""","""P00001""",2008-11-04 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""105094013""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""811219""","""M0014609PR012""",,,"""NONE""","""B""","""0""",2008-11-05 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""105094013""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""811219""","""M0014609PR012""",,,"""NONE""","""B""","""P00001""",2008-11-05 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""300923403""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""YES""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""332913""","""M0014609PR027""",,,"""NONE""","""C""","""0""",2008-12-12 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""312167720""","""UNITED STATES""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""423990""","""M0014609PD063""",,,"""NONE""","""A""","""0""",2008-11-05 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"
"""NO""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""310237535""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""336413""","""M0014609VF099""",,,"""NONE""","""B""","""0""",2008-12-09 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"
"""NO""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""310237535""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""423860""","""M0014609VW005""",,,"""NONE""","""B""","""0""",2008-10-15 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""SBA""","""284458788""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""423510""","""M0014609PD043""",,,"""NONE""","""D""","""0""",2008-10-27 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""SBA""","""278035701""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""811118""","""0001""",,,"""NONE""","""D""","""0""",2008-12-04 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""285603227""","""UNITED STATES""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""332312""","""M0014609PB004""",,,"""NONE""","""A""","""0""",2008-11-07 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…"


## *Sidebar: Ensuring the 2023 file could also be added to the larger dataframe successfully*

In [21]:
df_2023 = pl.read_parquet("FY23_SC_prelim.parquet", columns = all_cols, n_rows=15)

In [22]:
df_2023 = df_2023.with_columns(pl.col("FISCAL_YEAR").fill_null(int(2023)))

In [23]:
df_2023 = df_2023.with_columns(
    FUNDING_OFFICE = pl.col("FUNDING_OFFICE_NAME") + " - " + pl.col("FUNDING_AGENCY_NAME"))

In [24]:
concatenated_dataframe_all = pl.concat([concatenated_dataframe, df_2023], how="vertical_relaxed")

## *It works!* 

In [25]:
concatenated_dataframe_all.tail(15)

FOR_PROFIT_ORGANIZATION,DOT_CERTIFIED_DISADV_BUS,SDB,FISCAL_YEAR,CAGE_CODE,IDV_CONTRACTING_AGENCY_NAME,IDV_EXTENT_COMPETED,IDV_EVALUATED_PREFERENCE,IDV_SIGNED_DATE,IDV_LAST_DATE_TO_ORDER,IDV_NUMBER_OF_OFFERS,PART8_OR_PART13,AWARD_FISCAL_YEAR,PRODUCT_OR_SERVICE_TYPE,TOTAL_ESTIMATED_ORDER_VALUE,VENDOR_UEI,TYPE_OF_SET_ASIDE,VENDOR_ADDRESS_ZIP_CODE,VENDOR_ADDRESS_COUNTRY_NAME,EDUCATIONAL_INSTITUTION_FLAG,FIRM_8A_FLAG,WOMEN_OWNED_FLAG,FIRM8A_JOINT_VENTURE,FEDERALLY_FUNDED_R_AND_D_CORP,CORP_ENTITY_NOT_TAX_EXEMPT,PARTNERSHIP_OR_LLP,SOLE_PROPREITORSHIP,SMALL_AGRICULTURAL_COOPERATIVE,INTERNATIONAL_ORGANIZATION,ARCHITECTURE_AND_ENGINEERING,COMMUNITY_CORP_OWNED_FIRM,CONSTRUCTION_FIRM,DOMESTIC_SHELTER,FOUNDATION,MANUFACTURER_OF_GOODS,RESEARCH_AND_DEVELOPMENT,SERVICE_PROVIDER,VETERINARY_HOSPITAL,HISPANIC_SERVICING_INSTITUTION,LIMITED_LIABILITY_CORPORATION,CO_BUS_SIZE_DETERMINATION,PRINCIPAL_NAICS_CODE,PIID,CURRENT_CONTRACT_VALUE,NUMBER_OF_OFFERS_RECEIVED,EVALUATED_PREFERENCE,EXTENT_COMPETED,MODIFICATION_NUMBER,DATE_SIGNED,FUNDING_AGENCY_NAME,FUNDING_OFFICE_NAME,FUNDING_DEPARTMENT_NAME,FUNDING_OFFICE
str,str,str,str,str,str,str,str,datetime[ms],datetime[ms],i32,str,str,str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,i32,str,str,str,datetime[ms],str,str,str,str
"""YES""","""NO""","""YES""","""2023""","""7X0Z6""",,,,,,,,"""2023""","""P""",,"""R757HCV7LJD6""","""8AN""","""890602722""","""UNITED STATES""","""NO""","""YES""","""YES""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""339991""","""SPE7M223P0353""",20642.23,1,"""NONE""","""B""","""0""",2022-11-01 00:00:00,"""DEFENSE LOGIST…","""DLA LAND AND M…","""DEPT OF DEFENS…","""DLA LAND AND M…"
"""YES""","""NO""","""NO""","""2023""","""7Y333""",,,,,,,,"""2023""","""P""",,"""ZT2XY79QF4M3""","""NONE""","""234553302""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""335932""","""SPE7M523V0471""",4354.1,1,"""NONE""","""F""","""0""",2022-11-01 00:00:00,"""DEFENSE LOGIST…","""DLA LAND AND M…","""DEPT OF DEFENS…","""DLA LAND AND M…"
"""YES""","""NO""","""NO""","""2023""","""9DV74""",,,,,,,,"""2023""","""P""",,"""F28HDPCVHEL8""","""NONE""","""024533824""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""334516""","""140G0323P0035""",31561.25,5,"""NONE""","""F""","""0""",2022-12-09 00:00:00,"""US GEOLOGICAL …","""OFFICE OF ACQU…","""INTERIOR, DEPA…","""OFFICE OF ACQU…"
"""YES""","""NO""","""NO""","""2023""","""3T8J4""",,,,,,,,"""2023""","""P""",,"""ZVLCLSN4LVF8""","""NONE""","""189743532""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""334419""","""SPE4A623V1645""",0.0,4,"""NONE""","""F""","""P00001""",2022-10-25 00:00:00,"""DEFENSE LOGIST…","""DLA AVIATION""","""DEPT OF DEFENS…","""DLA AVIATION -…"
"""YES""","""NO""","""NO""","""2023""","""329E3""",,,,,,,,"""2023""","""P""",,"""THJ1QWDKVS83""","""NONE""","""135022533""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""332722""","""SPE7L123P0951""",687.6,5,"""NONE""","""F""","""0""",2022-10-27 00:00:00,"""DEFENSE LOGIST…","""DLA LAND AND M…","""DEPT OF DEFENS…","""DLA LAND AND M…"
"""YES""","""NO""","""NO""","""2023""","""62WS2""",,,,,,,,"""2023""","""P""",,"""D6J2UML3MAC4""","""NONE""","""481899064""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""332119""","""SPE4A722PE662""",-7580.82,8,"""NONE""","""F""","""P00001""",2022-11-03 00:00:00,"""DEFENSE LOGIST…","""DLA AVIATION""","""DEPT OF DEFENS…","""DLA AVIATION -…"
"""YES""","""NO""","""NO""","""2023""","""3JEH0""",,,,,,,,"""2023""","""S""",,"""KA5HQCLKUVW1""","""NONE""","""204050001""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""561612""","""15M10223PA4700…",57720.0,1,"""NONE""","""F""","""0""",2022-10-24 00:00:00,"""U.S. MARSHALS …","""U.S. DEPT OF J…","""JUSTICE, DEPAR…","""U.S. DEPT OF J…"
"""YES""","""NO""","""NO""","""2023""","""3BUS5""",,,,,,,,"""2023""","""P""",,"""HL5PZFB7KHV4""","""NONE""","""750622740""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""332216""","""47QSHA23P0581""",6732.98,1,"""NONE""","""G""","""0""",2022-12-12 00:00:00,"""FEDERAL ACQUIS…","""GSA/FAS HEARTL…","""GENERAL SERVIC…","""GSA/FAS HEARTL…"
"""YES""","""NO""","""NO""","""2023""","""1H2F2""",,,,,,,,"""2023""","""S""",,"""L1WZRB6JKWJ4""","""NONE""","""968133916""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""541715""","""N0001421C1122""",0.0,999,"""NONE""","""A""","""ARZ998""",2022-11-08 00:00:00,"""DEPT OF THE NA…","""OFFICE OF NAVA…","""DEPT OF DEFENS…","""OFFICE OF NAVA…"
"""YES""","""NO""","""NO""","""2023""","""35KC0""",,,,,,,,"""2023""","""S""",,"""LN9PU5M2YZN5""","""NONE""","""204050001""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""721110""","""19KS7022P0613""",-1045.45,1,"""NONE""","""G""","""P00002""",2022-10-17 00:00:00,"""OFFICE OF THE …","""OFFICE INSPECT…","""HOMELAND SECUR…","""OFFICE INSPECT…"


## **SAM Files** **(From Cert_Info notebook)**

### Pull column names from SAM extracting map

In [26]:
col_names = pl.read_excel('SAM_MASTER_EXTRACT_MAPPING_1Nov2023.xlsx', read_options={'skip_rows':3, 'null_values':'N/A'})\
    .filter(pl.col('Public')=='X')\
    .to_series()\
    .to_list()

### Read in the SAM data

In [27]:
full_reg = pl.read_csv('SAM_PUBLIC_MONTHLY_V2_20240303.dat', has_header = False, separator='|', skip_rows=1,
           missing_utf8_is_empty_string=True, ignore_errors=True, new_columns=col_names, truncate_ragged_lines=True)

### Filter for relevant columns
***CAGE CODE has the most values over more data sets compared to VENDOR UEI, therefore we should use CAGE CODE as our identifier moving forward***

In [28]:
full_reg=full_reg[['UNIQUE ENTITY ID','CAGE CODE', 'BUS TYPE STRING', 'SBA BUSINESS TYPES STRING']]

In [29]:
full_reg.head(50)

UNIQUE ENTITY ID,CAGE CODE,BUS TYPE STRING,SBA BUSINESS TYPES STRING
str,str,str,str
"""C111ATT311C8""","""53YC5""","""2X~8W~A2~HQ~XS…",""""""
"""C111BG66D155""","""6M9A6""","""A8""",""""""
"""C111FE1KRJF1""","""6T4Q4""","""A8""",""""""
"""C111JJBMS328""","""6PA87""","""2X~XS""",""""""
"""C112Q3D8VPM3""","""50JA8""","""2X~LJ~MF""",""""""
"""C112YNTNMG99""","""4PVE2""","""12~H6""",""""""
"""C112ZNS5HMR4""","""7UNS3""","""2X~8W~A2""",""""""
"""C113A7U19RD5""","""5LXD1""","""2X~LJ~MF""",""""""
"""C113E4798JC5""","""62EJ7""","""A8""",""""""
"""C113F9JLF4D9""","""6TSS5""","""27~2X""",""""""


### Create business type columns using the SAM data dictionary file information

### 8(a)

In [30]:
# Minority-Owned Business
self_8a_column = pl.col('BUS TYPE STRING').str.contains('23').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_8A')
full_reg = full_reg.with_columns(self_8a_column)

In [31]:
# SBA Certified 8(a) Program Participant
cert_8a_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('A6').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_8A')
full_reg = full_reg.with_columns(cert_8a_column)

### HUBZone

In [32]:
# SBA Certified HUBZone Firm
cert_HUBZone_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('XX').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_HUBZone')
full_reg = full_reg.with_columns(cert_HUBZone_column)

### Small Disadvantaged Business

In [33]:
# Self Certified Small Disadvantaged Business
self_SDB_column = pl.col('BUS TYPE STRING').str.contains('27').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_SDB')
full_reg = full_reg.with_columns(self_SDB_column)

In [34]:
# SBA Certified Small Disadvantaged Business
cert_SDB_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('A4').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_SDB')
full_reg = full_reg.with_columns(cert_SDB_column)

### Women-Owned Small Business

In [35]:
# Self Certified Women-Owned Small Business
self_WOSB_column = pl.col('BUS TYPE STRING').str.contains('8W').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_WOSB')
full_reg = full_reg.with_columns(self_WOSB_column)

In [36]:
# SBA Certified Women-Owned Small Business
cert_WOSB_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('A9').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_WOSB')
full_reg = full_reg.with_columns(cert_WOSB_column)

### Economically Disadvantaged Women-Owned Small Business

In [37]:
# Self Certified Economically Disadvantaged Women-Owned Small Business 
# Self Certified Women-Owned Small Business + Self Certified Small Disadvantaged Business
self_EDWOSB_column = pl.col('BUS TYPE STRING').str.contains('8W|27').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_EDWOSB')
full_reg = full_reg.with_columns(self_EDWOSB_column)

In [38]:
# SBA Certified Economically Disadvantaged Women-Owned Small Business
cert_EDWOSB_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('A0').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_EDWOSB')
full_reg = full_reg.with_columns(cert_EDWOSB_column)

### Veteran-Owned Small Business (https://veterans.certify.sba.gov/#eligibility)

In [39]:
self_VOSB_column = pl.col('BUS TYPE STRING').str.contains('A5').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_VOSB')
full_reg = full_reg.with_columns(self_VOSB_column) #Do we need to vet if a business is small?

In [40]:
self_SDVOSB_column = pl.col('BUS TYPE STRING').str.contains('QF').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_SDVOSB')
full_reg = full_reg.with_columns(self_SDVOSB_column)

In [41]:
self_SDVOSBJV_column = pl.col('BUS TYPE STRING').str.contains('JV').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_SDVOSBJV')
full_reg = full_reg.with_columns(self_SDVOSBJV_column)

### Ensure code above worked successfully

In [42]:
full_reg.shape

(643461, 16)

In [43]:
full_reg.head(20)

UNIQUE ENTITY ID,CAGE CODE,BUS TYPE STRING,SBA BUSINESS TYPES STRING,SELF_8A,CERT_8A,CERT_HUBZone,SELF_SDB,CERT_SDB,SELF_WOSB,CERT_WOSB,SELF_EDWOSB,CERT_EDWOSB,SELF_VOSB,SELF_SDVOSB,SELF_SDVOSBJV
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""C111ATT311C8""","""53YC5""","""2X~8W~A2~HQ~XS…","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO"""
"""C111BG66D155""","""6M9A6""","""A8""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C111FE1KRJF1""","""6T4Q4""","""A8""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C111JJBMS328""","""6PA87""","""2X~XS""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C112Q3D8VPM3""","""50JA8""","""2X~LJ~MF""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C112YNTNMG99""","""4PVE2""","""12~H6""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C112ZNS5HMR4""","""7UNS3""","""2X~8W~A2""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO"""
"""C113A7U19RD5""","""5LXD1""","""2X~LJ~MF""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C113E4798JC5""","""62EJ7""","""A8""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C113F9JLF4D9""","""6TSS5""","""27~2X""","""""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO"""


### Rename Cage Code column for joining purposes

In [44]:
full_reg = full_reg.rename({"CAGE CODE": "CAGE_CODE"})

### Ensure code above worked

In [45]:
full_reg.columns

['UNIQUE ENTITY ID',
 'CAGE_CODE',
 'BUS TYPE STRING',
 'SBA BUSINESS TYPES STRING',
 'SELF_8A',
 'CERT_8A',
 'CERT_HUBZone',
 'SELF_SDB',
 'CERT_SDB',
 'SELF_WOSB',
 'CERT_WOSB',
 'SELF_EDWOSB',
 'CERT_EDWOSB',
 'SELF_VOSB',
 'SELF_SDVOSB',
 'SELF_SDVOSBJV']

In [46]:
full_reg.head(20)

UNIQUE ENTITY ID,CAGE_CODE,BUS TYPE STRING,SBA BUSINESS TYPES STRING,SELF_8A,CERT_8A,CERT_HUBZone,SELF_SDB,CERT_SDB,SELF_WOSB,CERT_WOSB,SELF_EDWOSB,CERT_EDWOSB,SELF_VOSB,SELF_SDVOSB,SELF_SDVOSBJV
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""C111ATT311C8""","""53YC5""","""2X~8W~A2~HQ~XS…","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO"""
"""C111BG66D155""","""6M9A6""","""A8""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C111FE1KRJF1""","""6T4Q4""","""A8""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C111JJBMS328""","""6PA87""","""2X~XS""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C112Q3D8VPM3""","""50JA8""","""2X~LJ~MF""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C112YNTNMG99""","""4PVE2""","""12~H6""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C112ZNS5HMR4""","""7UNS3""","""2X~8W~A2""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO"""
"""C113A7U19RD5""","""5LXD1""","""2X~LJ~MF""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C113E4798JC5""","""62EJ7""","""A8""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""C113F9JLF4D9""","""6TSS5""","""27~2X""","""""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO"""


## Merge SAM dataframes with each other

**The approach here is to perform anti joins to create dataframes that have unique rows not found by the main dataframe, which for our case will be the 0303 2024 SAM data file. From there we will concatenate the dataframes as performed above using a dictionary to create a data frame list. The dataframes will only contain CAGE CODE and BUSINESS TYPE STRING, and once merged, the cleaning performed to gather the relevant columns will be executed on the complete dataset.**

***The trick here is finding a way to approach creating the data sets considering the naming conventation and column name collection isn't as straightfoward as it was for the parquet files.***

In [47]:
SAM_column_mapping_files = ['SAM Mapping Public File Layout - Modified.xlsx',
'SAM Mapping Public File Layout - Modified.xlsx', 
'SAM Mapping Public File Layout - Modified.xlsx',
'SAM Mapping Public File Layout - Modified.xlsx', 
'SAM Mapping Public File Layout - Modified.xlsx',
'SAM Mapping Public File Layout - Modified.xlsx',
'SAM Mapping Public File Layout - Modified.xlsx',  
'SAM Mapping Public File Layout - Modified.xlsx',    
'SAM Mapping Public File Layout - Modified.xlsx',
'SAM Mapping Public File Layout - Modified.xlsx', 
'SAM Mapping Public File Layout - Modified.xlsx', 
'SAM Mapping Public File Layout - Modified.xlsx', 
'SAM Mapping Public V2 File Layout - Modified.xlsx', 
'SAM Mapping Public V2 File Layout - Modified.xlsx', 
'SAM Mapping Public V2 File Layout - Modified.xlsx', 
'SAM_MASTER_EXTRACT_MAPPING_1Nov2023.xlsx',
'SAM_MASTER_EXTRACT_MAPPING_1Nov2023.xlsx']


In [48]:
SAM_datafiles = ['SAM_PUBLIC_MONTHLY_2014_NOV_MODIFIED.dat', 
'SAM_PUBLIC_MONTHLY_2015_MAY_MODIFIED.dat', 'SAM_PUBLIC_MONTHLY_2015_NOV_MODIFIED.dat', 
'SAM_PUBLIC_MONTHLY_2016_MAY_MODIFIED.dat', 'SAM_PUBLIC_MONTHLY_2016_NOV_MODIFIED.dat', 
'SAM_PUBLIC_MONTHLY_2017_MAY_MODIFIED.dat', 'SAM_PUBLIC_MONTHLY_2017_NOV_MODIFIED.dat',
'SAM_PUBLIC_MONTHLY_2018_MAY_MODIFIED.dat', 'SAM_PUBLIC_MONTHLY_2018_NOV_MODIFIED.dat', 
'SAM_PUBLIC_MONTHLY_2019_MAY_MODIFIED.dat', 'SAM_PUBLIC_MONTHLY_2019_NOV_MODIFIED.dat', 
'SAM_PUBLIC_MONTHLY_2020_MAY_MODIFIED.dat', 'SAM_PUBLIC_MONTHLY_2020_NOV_MODIFIED.dat', 
'SAM_PUBLIC_MONTHLY_2021_MAY_MODIFIED.dat', 'SAM_PUBLIC_MONTHLY_2021_NOV_MODIFIED.dat', 
'SAM_PUBLIC_MONTHLY_V2_20231105.dat', 'SAM_PUBLIC_MONTHLY_V2_20240204.dat']


In [49]:
f = {}

In [50]:
col_names = pl.read_excel('SAM_MASTER_EXTRACT_MAPPING_1Nov2023.xlsx', read_options={'skip_rows':3, 'null_values':'N/A'})\
    .filter(pl.col('Public')=='X')\
    .to_series()\
    .to_list()

In [51]:
base_reg = pl.read_csv('SAM_PUBLIC_MONTHLY_V2_20240303.dat', has_header = False, separator='|', skip_rows=1,
           missing_utf8_is_empty_string=True, ignore_errors=True, new_columns=col_names, truncate_ragged_lines=True)

In [52]:
base_reg = base_reg[['CAGE CODE', 'BUS TYPE STRING', 'SBA BUSINESS TYPES STRING']]

In [53]:
len_base = len(base_reg)

In [54]:
len_base

643461

In [55]:
len_base_unique = len(base_reg['CAGE CODE'].unique())

In [56]:
len_base_unique

587685

In [57]:
len_base - len_base_unique

55776

In [58]:
for x in range(0,17):
    col_file = SAM_column_mapping_files[x]
    SAM_file = SAM_datafiles[x]
    SAM_col_names = pl.read_excel(col_file, read_options={'skip_rows':3, 'null_values':'N/A'})\
    .filter(pl.col('Public')=='X')\
    .to_series()\
    .to_list()
    f[x] = pl.read_csv(SAM_file, has_header = False, separator='|', skip_rows=1,
           missing_utf8_is_empty_string=True, ignore_errors=True, new_columns=SAM_col_names, truncate_ragged_lines=True)
    f[x] = f[x][['CAGE CODE', 'BUS TYPE STRING', 'SBA BUSINESS TYPES STRING']]
    f[x] = f[x].join(base_reg, on="CAGE CODE", how="anti")
    concat_list = [base_reg, f[x]]
    # Code below updates the dataframe as each iteration occurs
    base_reg = pl.concat(concat_list, how="vertical_relaxed")

**The mismatch in cage codes is inherent to the base dataframe used as seen by the before and after for loop mismatch number of rows. Every cage code added is unique, which is ideal for computational reasons.**

In [59]:
len_base = len(base_reg)

In [60]:
len_base

1628235

In [61]:
len_base_unique = len(base_reg['CAGE CODE'].unique())

In [62]:
len_base_unique

1572455

In [63]:
len_base - len_base_unique

55780

In [64]:
base_reg.filter(base_reg.is_duplicated())

CAGE CODE,BUS TYPE STRING,SBA BUSINESS TYPES STRING
str,str,str
"""""","""2X~8E~8W~A2""",""""""
"""""","""A8""",""""""
"""""","""12~C6""",""""""
"""""","""2X""",""""""
"""""","""23~2X~PI""",""""""
"""""","""80~A8""",""""""
"""""","""80~A8""",""""""
"""""","""2X""",""""""
"""""","""2X~A2~LJ""",""""""
"""""","""A8""",""""""


In [65]:
base_reg.filter(base_reg.is_duplicated())['CAGE CODE'].unique()

CAGE CODE
str
"""9RNB4"""
""""""


In [66]:
base_reg.filter(base_reg.is_duplicated())['BUS TYPE STRING'].unique()

BUS TYPE STRING
str
"""2X~8C~8D~8W~A2…"
"""23~2X~A5~FR~QF…"
"""2X~H2~HK"""
"""23~2X~A2~PI"""
"""2X~6D~LJ"""
"""2X~A5~LJ~OY"""
"""23~2X~8E~8W~A2…"
"""1R~2X~8W~A2~M8…"
"""2F~80"""
"""20~2X~HQ"""


**The above code shows that the duplicate cage codes are actually empty strings**

In [67]:
base_reg.describe()

statistic,CAGE CODE,BUS TYPE STRING,SBA BUSINESS TYPES STRING
str,str,str,str
"""count""","""1628235""","""1628235""","""1628235"""
"""null_count""","""0""","""0""","""0"""
"""mean""",,,
"""std""",,,
"""min""","""""","""""",""""""
"""25%""",,,
"""50%""",,,
"""75%""",,,
"""max""","""ZY981""","""VW""","""XX20141020"""


## To get rid of duplicates, we can do the negation of the above and overwrite the dataframe
**The reason for doing this is that we care that the join between the parquet data and SAM data is a many to one type of join. This will avoid messiness in the long run.**

### Get rid of duplicates

In [68]:
base_reg.filter(~base_reg.is_duplicated())

CAGE CODE,BUS TYPE STRING,SBA BUSINESS TYPES STRING
str,str,str
"""53YC5""","""2X~8W~A2~HQ~XS…",""""""
"""6M9A6""","""A8""",""""""
"""6T4Q4""","""A8""",""""""
"""6PA87""","""2X~XS""",""""""
"""50JA8""","""2X~LJ~MF""",""""""
"""4PVE2""","""12~H6""",""""""
"""7UNS3""","""2X~8W~A2""",""""""
"""5LXD1""","""2X~LJ~MF""",""""""
"""62EJ7""","""A8""",""""""
"""6TSS5""","""27~2X""",""""""


In [69]:
base_reg = base_reg.filter(~base_reg.is_duplicated())

In [70]:
base_reg['CAGE CODE'].unique()

CAGE CODE
str
"""JSAN2"""
"""6H3M0"""
"""7ZX43"""
"""6MY24"""
"""0EBM3"""
"""961B6"""
"""91BL9"""
"""92AA5"""
"""6X1F6"""
"""9FNH6"""


**Identify duplicates specific to Cage Code** 

In [71]:
base_reg.filter(base_reg['CAGE CODE'].is_duplicated())

CAGE CODE,BUS TYPE STRING,SBA BUSINESS TYPES STRING
str,str,str
"""""","""23~2X~G9~HK~LJ…",""""""
"""""","""23~8C~8D~A2~A5…",""""""
"""""","""A2~A5~A8~H2""",""""""
"""""","""1D~1E~23~2X~8W…",""""""
"""""","""A5~A8~H2~HK~LJ…",""""""
"""""","""23~2X~8E~8W~A2…",""""""
"""""","""A5~A8~BZ~LJ""",""""""
"""""","""20~23~2X~OY""",""""""
"""""","""23~2X~8D~OY""",""""""
"""""","""A2~A8~HQ~LJ""",""""""


**Get rid of duplicates.**

In [72]:
base_reg = base_reg.filter(~base_reg['CAGE CODE'].is_duplicated())

In [73]:
len(base_reg)

1572449

In [74]:
len(base_reg['CAGE CODE'].unique())

1572449

**PERFECT!! Now to clean up that dataframe and generate the relevant columns.**

## 8(a)

In [75]:
# Minority-Owned Business
self_8a_column = pl.col('BUS TYPE STRING').str.contains('23').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_8A')
base_reg = base_reg.with_columns(self_8a_column)

# SBA Certified 8(a) Program Participant
cert_8a_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('A6').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_8A')
base_reg = base_reg.with_columns(cert_8a_column)

# SBA Certified HUBZone Firm
cert_HUBZone_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('XX').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_HUBZone')
base_reg = base_reg.with_columns(cert_HUBZone_column)

# Self Certified Small Disadvantaged Business
self_SDB_column = pl.col('BUS TYPE STRING').str.contains('27').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_SDB')
base_reg = base_reg.with_columns(self_SDB_column)

# SBA Certified Small Disadvantaged Business
cert_SDB_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('A4').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_SDB')
base_reg = base_reg.with_columns(cert_SDB_column)

# Self Certified Women-Owned Small Business
self_WOSB_column = pl.col('BUS TYPE STRING').str.contains('8W').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_WOSB')
base_reg = base_reg.with_columns(self_WOSB_column)

# SBA Certified Women-Owned Small Business
cert_WOSB_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('A9').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_WOSB')
base_reg = base_reg.with_columns(cert_WOSB_column)

# Self Certified Economically Disadvantaged Women-Owned Small Business 
# Self Certified Women-Owned Small Business + Self Certified Small Disadvantaged Business
self_EDWOSB_column = pl.col('BUS TYPE STRING').str.contains('8W|27').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_EDWOSB')
base_reg = base_reg.with_columns(self_EDWOSB_column)

# SBA Certified Economically Disadvantaged Women-Owned Small Business
cert_EDWOSB_column = pl.col('SBA BUSINESS TYPES STRING').str.contains('A0').map_elements(lambda x: 'YES' if x else 'NO').alias('CERT_EDWOSB')
base_reg = base_reg.with_columns(cert_EDWOSB_column)

self_VOSB_column = pl.col('BUS TYPE STRING').str.contains('A5').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_VOSB')
base_reg = base_reg.with_columns(self_VOSB_column) #Do we need to vet if a business is small?

self_SDVOSB_column = pl.col('BUS TYPE STRING').str.contains('QF').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_SDVOSB')
base_reg = base_reg.with_columns(self_SDVOSB_column)

self_SDVOSBJV_column = pl.col('BUS TYPE STRING').str.contains('JV').map_elements(lambda x: 'YES' if x else 'NO').alias('SELF_SDVOSBJV')
base_reg = base_reg.with_columns(self_SDVOSBJV_column)

**Rename CAGE CODE column for joining purposes**

In [76]:
base_reg = base_reg.rename({"CAGE CODE": "CAGE_CODE"})

## Merge both the Parquet and SAM dataframes

In [77]:
PSAM = concatenated_dataframe_all.join(base_reg, on="CAGE_CODE", how="left")

In [78]:
PSAM.head(30)

FOR_PROFIT_ORGANIZATION,DOT_CERTIFIED_DISADV_BUS,SDB,FISCAL_YEAR,CAGE_CODE,IDV_CONTRACTING_AGENCY_NAME,IDV_EXTENT_COMPETED,IDV_EVALUATED_PREFERENCE,IDV_SIGNED_DATE,IDV_LAST_DATE_TO_ORDER,IDV_NUMBER_OF_OFFERS,PART8_OR_PART13,AWARD_FISCAL_YEAR,PRODUCT_OR_SERVICE_TYPE,TOTAL_ESTIMATED_ORDER_VALUE,VENDOR_UEI,TYPE_OF_SET_ASIDE,VENDOR_ADDRESS_ZIP_CODE,VENDOR_ADDRESS_COUNTRY_NAME,EDUCATIONAL_INSTITUTION_FLAG,FIRM_8A_FLAG,WOMEN_OWNED_FLAG,FIRM8A_JOINT_VENTURE,FEDERALLY_FUNDED_R_AND_D_CORP,CORP_ENTITY_NOT_TAX_EXEMPT,PARTNERSHIP_OR_LLP,SOLE_PROPREITORSHIP,SMALL_AGRICULTURAL_COOPERATIVE,INTERNATIONAL_ORGANIZATION,ARCHITECTURE_AND_ENGINEERING,COMMUNITY_CORP_OWNED_FIRM,CONSTRUCTION_FIRM,DOMESTIC_SHELTER,FOUNDATION,MANUFACTURER_OF_GOODS,RESEARCH_AND_DEVELOPMENT,SERVICE_PROVIDER,VETERINARY_HOSPITAL,HISPANIC_SERVICING_INSTITUTION,LIMITED_LIABILITY_CORPORATION,CO_BUS_SIZE_DETERMINATION,PRINCIPAL_NAICS_CODE,PIID,CURRENT_CONTRACT_VALUE,NUMBER_OF_OFFERS_RECEIVED,EVALUATED_PREFERENCE,EXTENT_COMPETED,MODIFICATION_NUMBER,DATE_SIGNED,FUNDING_AGENCY_NAME,FUNDING_OFFICE_NAME,FUNDING_DEPARTMENT_NAME,FUNDING_OFFICE,BUS TYPE STRING,SBA BUSINESS TYPES STRING,SELF_8A,CERT_8A,CERT_HUBZone,SELF_SDB,CERT_SDB,SELF_WOSB,CERT_WOSB,SELF_EDWOSB,CERT_EDWOSB,SELF_VOSB,SELF_SDVOSB,SELF_SDVOSBJV
str,str,str,str,str,str,str,str,datetime[ms],datetime[ms],i32,str,str,str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,i32,str,str,str,datetime[ms],str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""SBA""","""277090000""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""541511""","""M0014609PH017""",,,"""NONE""","""D""","""P00001""",2008-11-04 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""105094013""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""811219""","""M0014609PR012""",,,"""NONE""","""B""","""0""",2008-11-05 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""105094013""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""811219""","""M0014609PR012""",,,"""NONE""","""B""","""P00001""",2008-11-05 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""300923403""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""YES""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""332913""","""M0014609PR027""",,,"""NONE""","""C""","""0""",2008-12-12 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""312167720""","""UNITED STATES""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""423990""","""M0014609PD063""",,,"""NONE""","""A""","""0""",2008-11-05 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,
"""NO""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""310237535""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""336413""","""M0014609VF099""",,,"""NONE""","""B""","""0""",2008-12-09 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,
"""NO""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""310237535""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""423860""","""M0014609VW005""",,,"""NONE""","""B""","""0""",2008-10-15 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""SBA""","""284458788""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""423510""","""M0014609PD043""",,,"""NONE""","""D""","""0""",2008-10-27 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""SBA""","""278035701""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""811118""","""0001""",,,"""NONE""","""D""","""0""",2008-12-04 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,
"""YES""","""NO""","""NO""","""2009""",,,,,,,,,,,,,"""NONE""","""285603227""","""UNITED STATES""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""332312""","""M0014609PB004""",,,"""NONE""","""A""","""0""",2008-11-07 00:00:00,"""DEPT OF THE NA…","""FLEET READINES…","""DEPT OF DEFENS…","""FLEET READINES…",,,,,,,,,,,,,,


In [79]:
PSAM.tail(30)

FOR_PROFIT_ORGANIZATION,DOT_CERTIFIED_DISADV_BUS,SDB,FISCAL_YEAR,CAGE_CODE,IDV_CONTRACTING_AGENCY_NAME,IDV_EXTENT_COMPETED,IDV_EVALUATED_PREFERENCE,IDV_SIGNED_DATE,IDV_LAST_DATE_TO_ORDER,IDV_NUMBER_OF_OFFERS,PART8_OR_PART13,AWARD_FISCAL_YEAR,PRODUCT_OR_SERVICE_TYPE,TOTAL_ESTIMATED_ORDER_VALUE,VENDOR_UEI,TYPE_OF_SET_ASIDE,VENDOR_ADDRESS_ZIP_CODE,VENDOR_ADDRESS_COUNTRY_NAME,EDUCATIONAL_INSTITUTION_FLAG,FIRM_8A_FLAG,WOMEN_OWNED_FLAG,FIRM8A_JOINT_VENTURE,FEDERALLY_FUNDED_R_AND_D_CORP,CORP_ENTITY_NOT_TAX_EXEMPT,PARTNERSHIP_OR_LLP,SOLE_PROPREITORSHIP,SMALL_AGRICULTURAL_COOPERATIVE,INTERNATIONAL_ORGANIZATION,ARCHITECTURE_AND_ENGINEERING,COMMUNITY_CORP_OWNED_FIRM,CONSTRUCTION_FIRM,DOMESTIC_SHELTER,FOUNDATION,MANUFACTURER_OF_GOODS,RESEARCH_AND_DEVELOPMENT,SERVICE_PROVIDER,VETERINARY_HOSPITAL,HISPANIC_SERVICING_INSTITUTION,LIMITED_LIABILITY_CORPORATION,CO_BUS_SIZE_DETERMINATION,PRINCIPAL_NAICS_CODE,PIID,CURRENT_CONTRACT_VALUE,NUMBER_OF_OFFERS_RECEIVED,EVALUATED_PREFERENCE,EXTENT_COMPETED,MODIFICATION_NUMBER,DATE_SIGNED,FUNDING_AGENCY_NAME,FUNDING_OFFICE_NAME,FUNDING_DEPARTMENT_NAME,FUNDING_OFFICE,BUS TYPE STRING,SBA BUSINESS TYPES STRING,SELF_8A,CERT_8A,CERT_HUBZone,SELF_SDB,CERT_SDB,SELF_WOSB,CERT_WOSB,SELF_EDWOSB,CERT_EDWOSB,SELF_VOSB,SELF_SDVOSB,SELF_SDVOSBJV
str,str,str,str,str,str,str,str,datetime[ms],datetime[ms],i32,str,str,str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,i32,str,str,str,datetime[ms],str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""YES""","""NO""","""NO""","""2022""","""87MR5""","""DEFENSE LOGIST…","""A""","""NONE""",2019-04-23 00:00:00,2023-03-31 00:00:00,148,,"""2022""","""P""",,"""G568KMLKNMC5""","""NONE""","""773032004""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""324110""","""SPE60722FEULE""",196.25,,"""NONE""","""A""","""0""",2022-08-05 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…","""DLA ENERGY - D…","""2X""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""YES""","""NO""","""NO""","""2022""","""0R7C4""","""DEFENSE LOGIST…","""A""","""NONE""",2018-04-04 00:00:00,2022-03-31 00:00:00,1,,"""2022""","""P""",,"""Z4SKYSSHK3Z9""","""NONE""","""891193257""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""OTHER THAN SMA…","""324110""","""SPE60722FD92Y""",466.78,,"""NONE""","""A""","""0""",2022-04-20 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…","""DLA ENERGY - D…","""2X~VW""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""YES""","""NO""","""NO""","""2022""","""9T645""","""DEFENSE LOGIST…","""A""","""NONE""",2020-07-13 00:00:00,2023-08-05 00:00:00,2,,"""2022""","""P""",,"""F88AH3GNC5T3""","""NONE""","""338803433""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""311812""","""SPE30022F30FU""",162.8,,"""NONE""","""A""","""0""",2022-02-23 00:00:00,"""DEFENSE LOGIST…","""DLA TROOP SUPP…","""DEPT OF DEFENS…","""DLA TROOP SUPP…","""2X~LJ~MF~VW""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""YES""","""NO""","""NO""","""2022""","""87MR5""","""DEFENSE LOGIST…","""A""","""NONE""",2019-04-23 00:00:00,2023-03-31 00:00:00,148,,"""2022""","""P""",,"""G568KMLKNMC5""","""NONE""","""773032004""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""324110""","""SPE60722FBG33""",58.1,,"""NONE""","""A""","""0""",2022-01-04 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…","""DLA ENERGY - D…","""2X""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""YES""","""NO""","""NO""","""2022""","""5P8G0""","""DEFENSE LOGIST…","""A""","""NONE""",2019-08-12 00:00:00,2023-03-31 00:00:00,145,,"""2022""","""P""",,"""G9N5DJF9U275""","""NONE""","""365422818""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""324110""","""SPE60722FC1X4""",4144.78,,"""NONE""","""A""","""0""",2022-02-23 00:00:00,"""DEFENSE LOGIST…","""DLA ENERGY""","""DEPT OF DEFENS…","""DLA ENERGY - D…","""2X~VW~XS""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""YES""","""NO""","""NO""","""2022""","""9T645""","""DEFENSE LOGIST…","""A""","""NONE""",2020-07-13 00:00:00,2023-08-05 00:00:00,2,,"""2022""","""P""",,"""F88AH3GNC5T3""","""NONE""","""338803433""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""311812""","""SPE30022F3N66""",153.0,,"""NONE""","""A""","""0""",2022-03-25 00:00:00,"""DEFENSE LOGIST…","""DLA TROOP SUPP…","""DEPT OF DEFENS…","""DLA TROOP SUPP…","""2X~LJ~MF~VW""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""YES""","""NO""","""NO""","""2022""","""1VZR7""","""DEFENSE LOGIST…","""F""","""NONE""",2013-11-07 00:00:00,2014-11-06 00:00:00,1,,"""2022""","""P""",,"""Q5UVH3KMTMD8""","""SBA""","""494246407""","""UNITED STATES""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""334419""","""SPE4A619F0659""",-3226.79,,"""NONE""","""F""","""P00001""",2022-03-02 00:00:00,"""DEFENSE LOGIST…","""DLA AVIATION""","""DEPT OF DEFENS…","""DLA AVIATION -…","""2X~A2~MF~VW""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""YES""","""NO""","""NO""","""2022""","""5CFE9""",,,,,,,,"""2022""","""S""",,"""DLZVGV3MGF31""","""NONE""","""209014846""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""711510""","""95170020P0012""",-12250.0,1,"""NONE""","""G""","""P00004""",2022-09-19 00:00:00,"""UNITED STATES …","""VOA NEWS CENTE…","""UNITED STATES …","""VOA NEWS CENTE…","""2X""","""""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO"""
"""YES""","""NO""","""YES""","""2022""","""7M0A2""",,,,,,,,"""2022""","""S""",,"""R4S8VME25J37""","""SBA""","""281179400""","""UNITED STATES""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""SMALL BUSINESS…","""336611""","""70Z08022PMECP0…",138653.0,4,"""NONE""","""F""","""0""",2022-02-02 00:00:00,"""U.S. COAST GUA…","""USCG FINANCE C…","""HOMELAND SECUR…","""USCG FINANCE C…","""27~2X~A5~LJ~QF…","""""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""YES""","""NO"""
"""YES""","""NO""","""NO""","""2022""","""8JF28""",,,,,,,,"""2022""","""S""",,"""C533SZKRAMH1""","""8AN""","""208522952""","""UNITED STATES""","""NO""","""NO""","""NO""","""YES""","""NO""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""SMALL BUSINESS…","""541511""","""80GSFC20C0125""",0.0,1,"""NONE""","""B""","""P00013""",2021-11-09 00:00:00,"""NATIONAL AERON…","""NASA GODDARD S…","""NATIONAL AERON…","""NASA GODDARD S…","""23~2X~A5~OY""","""JT20220818""","""YES""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""NO""","""YES""","""NO""","""NO"""


***SUCCESS!!!***