In [1]:
import pandas as pd
pd.options.display.max_columns=200
pd.options.display.max_rows=500
pd.options.display.max_colwidth=500

# from thefuzz import fuzz
# from thefuzz import process

from datetime import datetime

from glob import glob
import re

In [2]:
def normalize_program_names(_df, title_col = 'cfda_title'):

    df = _df.copy()
    df[title_col] = df[title_col].str.lower()
    df[title_col] = df[title_col].fillna('none')

    return df

In [3]:

def get_year(file_path):

    pattern = r'FY(\d{4})'

    # Search for the pattern in the string
    match = re.search(pattern, file_path)

    if match:
        year = match.group(1)
        return year
    else:
        return 1900


In [4]:
import re

all_data = []
for g in glob('usa_spending_contract_data/*/*',recursive=True):
    temp_data = pd.read_csv(g)
    temp_data['fiscal_year'] = get_year(g)
    all_data.append(temp_data)

raw_contracts = pd.concat(all_data)


# raw_contracts = pd.read_csv('usa_spending_contract_data/FY2019/All_Assistance_PrimeTransactions_2024-05-28_H10M16S23_1.csv')
# # raw_contracts = pd.read_csv('usa_spending_contract_data/FY2024/All_Assistance_PrimeTransactions_2024-05-28_H01M15S04_1.csv')


  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
contracts = normalize_program_names(raw_contracts)

In [6]:
contracts['total_outlayed_amount_for_overall_award'] = contracts['total_outlayed_amount_for_overall_award'].fillna(0)

contracts['estimated_remaining_funds'] = contracts['total_obligated_amount'] - contracts['total_outlayed_amount_for_overall_award']

contracts['spent_percent'] = ((contracts['total_outlayed_amount_for_overall_award'] / contracts['total_obligated_amount'])*100).round(0)

contracts['remaining_funds_percent'] = ((contracts['estimated_remaining_funds'] / contracts['total_obligated_amount'])*100).round(0)

contracts['period_of_performance_current_end_date'] = pd.to_datetime(contracts['period_of_performance_current_end_date'])

contracts['period_of_performance_start_date'] = pd.to_datetime(contracts['period_of_performance_start_date'])
contracts['action_date'] = pd.to_datetime(contracts['action_date'])


contracts['grant_is_open'] = contracts['period_of_performance_current_end_date'] >= datetime(2024,9,1)


contracts['program_activities_funding_this_award'] = contracts['program_activities_funding_this_award'].fillna('missing')

# Code program names 

In [7]:
program_map = {
    'comunity violence intervention and prevention intitiative':
        ['0153: COMMUNITY-BASED VIOLENCE PREVENTION INITIATIVES',
        #'public safety partnership and community policing grants',
        'community-based violence prevention program'],
    'victims of crime act (voca)':['0001: CRIME VICTIMS GRANTS AND ASSISTANCE','0006: TRIBAL VICTIMS ASSISTANCE GRANTS'],
    'center for culturally responsive victim services':[],
    'byrne memorial justice assistance grant':['0007: JUSTICE ASSISTANCE GRANTS','edward byrne memorial justice assistance grant program'],
    'byrne criminal justice innovation':['0081: COMMUNITY BASED CRIME REDUCTION PROGRAM (BYRNE CRIMINAL JUSTICE INNOVATION)'],
    'strategies to support children exposed to violence':['0024: CHILDREN EXPOSED TO VIOLENCE','children exposed to violence'],
    'project safe neighborhood':['0043: PROJECT SAFE NEIGHBORHOODS','project safe neighborhoods'],
    'youth violence and reductions':[],
    'school violence prevention program':['0143: STOP SCHOOL VIOLENCE ACT'],
    'second chance act community based re-entry program':['0050: SECOND CHANCE ACT/OFFENDER REENTRY'],
    'community policing development micro-grants':['0001: PUBLIC SAFETY AND COMMUNITY POLICING GRANTS'],
    'smart policing':['0021: STRATEGIES FOR POLICING INNOVATION'],
    'cops hiring program':['0009: COPS HIRING PROGRAM']
}


In [8]:
def match_grant_program_to_map(x):

    if x is None:
        return 'no match'

    program_matches = []
    for program_name, program_list in program_map.items():

        for program_code in program_list:
            if x.find(program_code) != -1:
                program_matches.append(program_name)
    
    # if len(program_matches) > 1:
    #     print(f"MULTIPLE MATCHES FOR {x}")
        
    
    if len(program_matches) == 0:
        return "no match"

    return program_matches[0]

In [9]:
contracts["program_match__funding_code"] = contracts[
    "program_activities_funding_this_award"
].apply(match_grant_program_to_map)

contracts["program_match__title"] = contracts["cfda_title"].apply(
    match_grant_program_to_map
)
contracts["program_match"] = contracts["program_match__funding_code"].where(
    contracts["program_match__funding_code"] != "no match",
    contracts["program_match__title"],
)

In [10]:
contracts.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 48276 entries, 0 to 8810
Data columns (total 120 columns):
assistance_transaction_unique_key                                 object
assistance_award_unique_key                                       object
award_id_fain                                                     object
modification_number                                               object
award_id_uri                                                      object
sai_number                                                        object
federal_action_obligation                                         float64
total_obligated_amount                                            float64
total_outlayed_amount_for_overall_award                           float64
indirect_cost_federal_share_amount                                float64
non_federal_funding_amount                                        float64
total_non_federal_funding_amount                                  float64
face_valu

## Check matches and non-matches

In [11]:
contracts['program_match'].value_counts(dropna=False)

no match                                                     31405
byrne memorial justice assistance grant                       9087
victims of crime act (voca)                                   3310
cops hiring program                                           1016
second chance act community based re-entry program             812
school violence prevention program                             770
community policing development micro-grants                    717
project safe neighborhood                                      708
byrne criminal justice innovation                              152
comunity violence intervention and prevention intitiative      152
smart policing                                                  88
strategies to support children exposed to violence              59
Name: program_match, dtype: int64

In [12]:
contracts[contracts['program_match']=='no match']['cfda_title'].value_counts()

public safety partnership and community policing grants                                                                                                          2989
coronavirus emergency supplemental funding program                                                                                                               2287
national institute of justice research, evaluation, and development project grants                                                                               1812
dna backlog reduction program                                                                                                                                    1445
state criminal alien assistance program                                                                                                                          1059
drug court discretionary grant program                                                                                                                           1028
serv

In [13]:
contracts[contracts['cfda_title']=='public safety partnership and community policing grants']['transaction_description'].sample(10).values

array(['THE PHOENIX POLICE DEPARTMENT IN PARTNERSHIP WITH ARIZONA STATE UNIVERSITY WILL USE FY23 MICROGRANT FUNDING TO CONDUCT A STUDY TO IDENITFY HOW INDIVIDUALS IN CRISIS MOST COMMONLY ACCESS SERVICES, HOW SUCCESSFUL THEY ARE AT USING MAPPED PATHWAYS TO MAKE CONNECTIONS, AND, IF THEY ENCOUNTER CHALLENGES, HOW THEY NAVIGATE OBSTACLES TO ACCESSING CARE. UTILIZING THE MAPPING TOOL AND GROUNDED THEORY AS A METHOD, THIS STUDY WILL COMBINE THE VOICES OF MULTIPLE STAKEHOLDERS IN A SYSTEMATIC WAY TO IDENTIFY BARRIERS WITH CLARITY AND OBJECTIVITY. THUS, THIS STUDY WILL ASSIST POLICY MAKERS BY OBJECTIVELY IDENTIFYING AND DEFINING ANY PROBLEM TO BE ADDRESSED, PRIORITIZING ISSUES FOR REFORM, AND FACILITATING THE DESIGNING OF A SOLUTION. A MAPPING TOOL WILL ENABLE INTERVIEW SUBJECTS TO IDENTIFY AND DESCRIBE SPECIFIC PATHWAYS TO CARE THAT THEY FOUND BOTH HELPFUL AND CHALLENGING. THE INTERVIEWS WILL BE TRANSCRIBED, ANALYZED USING A GROUNDED THEORY APPROACH, AND CODED FOR THEMES TO SYNTHESIZE THEIR 

# Only retain useful columns

In [14]:
keep_cols = [
    'usaspending_permalink',
    'fiscal_year',
    'total_obligated_amount',
    'estimated_remaining_funds',
    'spent_percent',
    'grant_is_open',
    'program_match',
    'business_types_description',
    'prime_award_base_transaction_description',
    'transaction_description',
    'funding_opportunity_goals_text',
    'cfda_title',
    'cfda_number',
    'primary_place_of_performance_scope',
    'primary_place_of_performance_city_name',
    'primary_place_of_performance_state_name',
    'primary_place_of_performance_zip_4',
    'recipient_name',
    'funding_agency_name',
    'period_of_performance_start_date',
    'period_of_performance_current_end_date',
    'assistance_award_unique_key',
    'action_date'
]

# De-duplicate


In [15]:
dup_cols = [
    'usaspending_permalink',
]

contracts.sort_values(by='action_date',ascending=True,inplace=True)

contracts.drop_duplicates(subset=dup_cols,keep='last',inplace=True)

# Write out

In [16]:
contracts[keep_cols].to_parquet('clean_data/clean_doj_contracts.parquet')

# QA

In [17]:
contracts[contracts['usaspending_permalink']=='https://www.usaspending.gov/award/ASST_NON_2017GPBX0001_1550/']


Unnamed: 0,assistance_transaction_unique_key,assistance_award_unique_key,award_id_fain,modification_number,award_id_uri,sai_number,federal_action_obligation,total_obligated_amount,total_outlayed_amount_for_overall_award,indirect_cost_federal_share_amount,non_federal_funding_amount,total_non_federal_funding_amount,face_value_of_loan,original_loan_subsidy_cost,total_face_value_of_loan,total_loan_subsidy_cost,generated_pragmatic_obligations,disaster_emergency_fund_codes_for_overall_award,outlayed_amount_from_COVID-19_supplementals_for_overall_award,obligated_amount_from_COVID-19_supplementals_for_overall_award,outlayed_amount_from_IIJA_supplemental_for_overall_award,obligated_amount_from_IIJA_supplemental_for_overall_award,action_date,action_date_fiscal_year,period_of_performance_start_date,period_of_performance_current_end_date,awarding_agency_code,awarding_agency_name,awarding_sub_agency_code,awarding_sub_agency_name,awarding_office_code,awarding_office_name,funding_agency_code,funding_agency_name,funding_sub_agency_code,funding_sub_agency_name,funding_office_code,funding_office_name,treasury_accounts_funding_this_award,federal_accounts_funding_this_award,object_classes_funding_this_award,program_activities_funding_this_award,recipient_uei,recipient_duns,recipient_name,recipient_name_raw,recipient_parent_uei,recipient_parent_duns,recipient_parent_name,recipient_parent_name_raw,recipient_country_code,recipient_country_name,recipient_address_line_1,recipient_address_line_2,recipient_city_code,recipient_city_name,prime_award_transaction_recipient_county_fips_code,recipient_county_name,prime_award_transaction_recipient_state_fips_code,recipient_state_code,recipient_state_name,recipient_zip_code,recipient_zip_last_4_code,prime_award_transaction_recipient_cd_original,prime_award_transaction_recipient_cd_current,recipient_foreign_city_name,recipient_foreign_province_name,recipient_foreign_postal_code,primary_place_of_performance_scope,primary_place_of_performance_country_code,primary_place_of_performance_country_name,primary_place_of_performance_code,primary_place_of_performance_city_name,prime_award_transaction_place_of_performance_county_fips_code,primary_place_of_performance_county_name,prime_award_transaction_place_of_performance_state_fips_code,primary_place_of_performance_state_name,primary_place_of_performance_zip_4,prime_award_transaction_place_of_performance_cd_original,prime_award_transaction_place_of_performance_cd_current,primary_place_of_performance_foreign_location,cfda_number,cfda_title,funding_opportunity_number,funding_opportunity_goals_text,assistance_type_code,assistance_type_description,transaction_description,prime_award_base_transaction_description,business_funds_indicator_code,business_funds_indicator_description,business_types_code,business_types_description,correction_delete_indicator_code,correction_delete_indicator_description,action_type_code,action_type_description,record_type_code,record_type_description,highly_compensated_officer_1_name,highly_compensated_officer_1_amount,highly_compensated_officer_2_name,highly_compensated_officer_2_amount,highly_compensated_officer_3_name,highly_compensated_officer_3_amount,highly_compensated_officer_4_name,highly_compensated_officer_4_amount,highly_compensated_officer_5_name,highly_compensated_officer_5_amount,usaspending_permalink,initial_report_date,last_modified_date,fiscal_year,estimated_remaining_funds,spent_percent,remaining_funds_percent,grant_is_open,program_match__funding_code,program_match__title,program_match
5414,1550_2017GPBX0001_-NONE-_16.609_00-3,ASST_NON_2017GPBX0001_1550,2017GPBX0001,00-3,,SAI NOT AVAILABLE,-0.1,499442.9,352570.88,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.1,Q: Not Designated Nonemergency/Emergency/Disaster/Wildfire Suppression,,,,,2022-02-14,2022,2017-10-01,2021-02-28,15,Department of Justice,1550,Office of Justice Programs,15PBJA,OJP BUREAU OF JUSTICE ASSISTANCE,15.0,Department of Justice,1550.0,Office of Justice Programs,15PBJA,OJP BUREAU OF JUSTICE ASSISTANCE,015-X-0404-000,015-0404,"41.0: Grants, subsidies, and contributions",0043: PROJECT SAFE NEIGHBORHOODS;0043: S&L GUN CRIME PROSECUTION ASSISTANCE/GUN VIOLENCE REDUCTION,P138LY34M6L6,,CITY OF CHICAGO,CITY OF CHICAGO,,,CITY OF CHICAGO,CITY OF CHICAGO,USA,UNITED STATES,3510 S MICHIGAN AVE,,14000,CHICAGO,17031,COOK,17,IL,ILLINOIS,60653,1020.0,IL-01,IL-01,,,,SINGLE ZIP CODE,USA,UNITED STATES,IL14000,CHICAGO,17031.0,COOK,17,ILLINOIS,60653-1020,IL-01,IL-01,,16.609,project safe neighborhoods,BJA-2017-11482,,4,PROJECT GRANT (B),"AS PART OF BJAS SMART SUITE, THE PROJECT SAFE NEIGHBORHOODS (PSN), REPRESENTS A STRATEGIC APPROACH THAT BRINGS MORE SCIENCE INTO CRIMINAL JUSTICE OPERATIONS BY LEVERAGING INNOVATIVE APPLICATIONS OF ANALYSIS, TECHNOLOGY, AND EVIDENCE-BASED PRACTICES WITH THE GOAL OF IMPROVING PERFORMANCE AND EFFECTIVENESS WHILE CONTAINING COSTS. THE PROGRAM'S EFFECTIVENESS IS BASED ON THE COOPERATION OF LOCAL, STATE, AND FEDERAL AGENCIES ENGAGED IN A UNIFIED APPROACH LED BY THE U.S. ATTORNEY (USA) IN EACH ...",NORTHERN DISTRICT OF ILLINOIS FY17 VIOLENT GANG AND GUN CRIME REDUCTION PROGRAM,NON,NOT RECOVERY ACT,C,CITY OR TOWNSHIP GOVERNMENT,,,D,ADJUSTMENT TO COMPLETED PROJECT,2,NON-AGGREGATE RECORD,,,,,,,,,,,https://www.usaspending.gov/award/ASST_NON_2017GPBX0001_1550/,2023-04-11,2023-04-11,2022,146872.02,71.0,29.0,False,project safe neighborhood,project safe neighborhood,project safe neighborhood
