# Blog 9 - Simple Ways to Identify Federal Technology Buying Trends

In [2]:
import os,glob
from datetime import datetime
from collections import Counter
from tqdm import tqdm
import re

import pandas as pd
import numpy as np

from dask.distributed import Client
import dask.dataframe as dd

import altair as alt
import matplotlib
import matplotlib.pyplot as plt

import psutil

import spacy

import requests
from bs4 import BeautifulSoup

In [3]:
def Display_System_Info(): # https://psutil.readthedocs.io/en/latest/#psutil.virtual_memory
    physical_cores = psutil.cpu_count(logical=False) #, psutil.cpu_count(logical=True)
    RAM_total_installed = psutil.virtual_memory()[0] #, psutil.swap_memory()
    RAM_available = psutil.virtual_memory().available #['available']
    dask_workers = int(physical_cores/2)
    print(f"Physical CPU Cores: {physical_cores}, RAM available: {round(RAM_available/1e9)} GB, total RAM: {round(RAM_total_installed/1e9)} GB")
    result_record = {'CPU_Cores_Physical' : physical_cores,
                    'RAM_available' : RAM_available,
                     'RAM_total_installed' : RAM_total_installed,
                    }
    return result_record

Display_System_Info()

Physical CPU Cores: 8, RAM available: 37 GB, total RAM: 69 GB


{'CPU_Cores_Physical': 8,
 'RAM_available': 37409853440,
 'RAM_total_installed': 68719476736}

In [4]:
## Key code for read CSV files into pandas and Dask
def Build_DTypes_Dict(filename_list):
    df = pd.read_csv(filename_list[0], nrows=1)
    
    dtype = dict(zip(sorted(df.columns), ['object'] * len(df.columns)))
    
    # ! important - may need to change other fields to category, int, date etc for better memory management
    dtype['federal_action_obligation'] = 'float64'
    #dtype['action_date_fiscal_year'] = 'int'
    
    return dtype

def Load_CSV_Files_Into_DF(filename_list, usecols = 'ALL'): #best approach if you have enough RAM memory on your machine
    
    dtype = Build_DTypes_Dict(filename_list) # ! important - may need to change other fields to category, int, date etc for better memory management
    
#     print(f"Reading file: {filename_list[0]}")
#     df = pd.read_csv(filename_list[0], dtype=dtype, low_memory=False) # read the first file
    
#     for filename in filename_list[1:]: #read the remaining files and append
#         print(f"Reading file: {filename}")
#         df = df.append(pd.read_csv(filename, dtype=dtype, low_memory=False))
        
    #alternative approach - faster?
    dataframe_loads_list = []
    for filename in filename_list:
        print(f"Reading file: {os.path.basename(filename)}")
        if usecols == 'ALL':
            dataframe_loads_list.append(pd.read_csv(filename, dtype=dtype, low_memory=False))
        else:
            dataframe_loads_list.append(pd.read_csv(filename, dtype=dtype, usecols = usecols, low_memory=False))
    df = pd.concat(dataframe_loads_list)
    print("Files loaded into pandas dataframe.")
    return df.reset_index(drop=True)

def Get_GFY_from_file_path(filename_to_check_complete_path):
    if os.path.isdir(filename_to_check_complete_path):
        #print(f"{filename_to_check_complete_path} is a directory. Ignore")
        return
    filename_GFY = os.path.basename(filename_to_check_complete_path)[:6]
    assert filename_GFY[:2] == 'FY' # check this
    return filename_GFY

def CurrentGFY():
    if datetime.now().month >= 10: 
        return datetime.now().year + 1
    else:
        return datetime.now().year
    
def getGFY(datestamp):
    if datestamp.month >= 10: 
        return datestamp.year + 1
    else:
        return datestamp.year
    
def Get_Current_Time():
    return datetime.now().strftime("%d/%m/%Y %H:%M:%S")

Get_Current_Time()

'23/11/2020 09:21:23'

In [8]:
#download_file_path_CSV = str(folder_choice.selected_path)

print("Change this to the path where you have stored the CSV files you downloaded and expanded from the USAspending.gov archives.")
download_file_path_CSV = "/Volumes/Samsung_T3/USAspending_Archive_Downloads/Expanded_CSV_Files/"
print()
print(f"Path to USAspending.gov CSV files of annual spending: {download_file_path_CSV}")

Change this to the path where you have stored the CSV files you downloaded and expanded from the USAspending.gov archives.

Path to USAspending.gov CSV files of annual spending: /Volumes/Samsung_T3/USAspending_Archive_Downloads/Expanded_CSV_Files/


In [9]:
# do a quick check to make sure the CSV files are there
csv_files_list = sorted(glob.glob(os.path.join(download_file_path_CSV, "*.csv")))

gfy_list = set()
size_of_files_GB = 0
for n, filename in enumerate(csv_files_list):
    filename_basename = os.path.basename(filename)
    print(n+1, filename_basename)
    gfy_list.add(filename_basename[:6])
    size_of_files_GB += os.path.getsize(filename)

all_GFY_list = sorted(set([Get_GFY_from_file_path(filename) for filename in csv_files_list]))
    
print()
print(f"There are {len(csv_files_list)} CSV files. They use {round(size_of_files_GB/1e9, 3)} GB of storage.")
print(f"Covering GFY: {sorted(gfy_list)}")
print(f"Current GFY: {'FY' + str(CurrentGFY())}")
print()

1 FY2010_All_Contracts_Full_20200807_1.csv
2 FY2010_All_Contracts_Full_20200807_2.csv
3 FY2010_All_Contracts_Full_20200807_3.csv
4 FY2010_All_Contracts_Full_20200807_4.csv
5 FY2011_All_Contracts_Full_20200807_1.csv
6 FY2011_All_Contracts_Full_20200807_2.csv
7 FY2011_All_Contracts_Full_20200807_3.csv
8 FY2011_All_Contracts_Full_20200807_4.csv
9 FY2012_All_Contracts_Full_20200807_1.csv
10 FY2012_All_Contracts_Full_20200807_2.csv
11 FY2012_All_Contracts_Full_20200807_3.csv
12 FY2012_All_Contracts_Full_20200807_4.csv
13 FY2013_All_Contracts_Full_20200807_1.csv
14 FY2013_All_Contracts_Full_20200807_2.csv
15 FY2013_All_Contracts_Full_20200807_3.csv
16 FY2014_All_Contracts_Full_20200808_1.csv
17 FY2014_All_Contracts_Full_20200808_2.csv
18 FY2014_All_Contracts_Full_20200808_3.csv
19 FY2015_All_Contracts_Full_20200808_1.csv
20 FY2015_All_Contracts_Full_20200808_2.csv
21 FY2015_All_Contracts_Full_20200808_3.csv
22 FY2015_All_Contracts_Full_20200808_4.csv
23 FY2015_All_Contracts_Full_20200808_5.c

In [10]:
## Load GFY2010-GFY2019 with limited Columns even though we will mostly use GFY19 (demonstrate usecols feature)

# You can limit the selection to a subset of the GFY to speed processing

ALL_GFY = False # switch to True to override the subset

user_select_GFY = ['FY2020','FY2019', 'FY2018', 'FY2017', 'FY2016']

if ALL_GFY:
    user_select_GFY = all_GFY_list
    
# print(f"GFY to read for analysis: {sorted(user_select_GFY)}")
# print()

# Create csv_files_list_restricted to streamline downstream data processing

csv_files_list = sorted(glob.glob(os.path.join(download_file_path_CSV, "*.csv")))
csv_files_list_restricted = [filename for filename in csv_files_list for GFY in user_select_GFY if GFY in filename]
gfy_found_in_files = [Get_GFY_from_file_path(filename) for filename in csv_files_list_restricted]
csv_GFY_missing = sorted(set(user_select_GFY) - set(gfy_found_in_files))

assert set(user_select_GFY) == set(user_select_GFY) # check to make sure this is working

print(f"You have selected these GFY for analysis:{user_select_GFY}")
print(f"Missing GFY: {csv_GFY_missing} -> if GFY missing, confirm you have decompressed the GFY zip file archive")

#[(os.path.basename(filename), round(os.path.getsize(filename)/1e9, 3)) for filename in csv_files_list_restricted]

You have selected these GFY for analysis:['FY2020', 'FY2019', 'FY2018', 'FY2017', 'FY2016']
Missing GFY: [] -> if GFY missing, confirm you have decompressed the GFY zip file archive


In [11]:
%%time

dtype = Build_DTypes_Dict(csv_files_list_restricted) # this handles reduces ambiguity for dask interpreting data type to infer on read

### Since we only need a few fields from the ~280 fields, we can radically reduce the memory needed and use pandas

fields = sorted(dtype.keys())

CPU times: user 23.2 ms, sys: 1.88 ms, total: 25.1 ms
Wall time: 26.4 ms


In [13]:
dtype

{'1862_land_grant_college': 'object',
 '1890_land_grant_college': 'object',
 '1994_land_grant_college': 'object',
 'a76_fair_act_action': 'object',
 'a76_fair_act_action_code': 'object',
 'action_date': 'object',
 'action_date_fiscal_year': 'object',
 'action_type': 'object',
 'action_type_code': 'object',
 'airport_authority': 'object',
 'alaskan_native_corporation_owned_firm': 'object',
 'alaskan_native_servicing_institution': 'object',
 'american_indian_owned_business': 'object',
 'asian_pacific_american_owned_business': 'object',
 'award_description': 'object',
 'award_id_piid': 'object',
 'award_or_idv_flag': 'object',
 'award_type': 'object',
 'award_type_code': 'object',
 'awarding_agency_code': 'object',
 'awarding_agency_name': 'object',
 'awarding_office_code': 'object',
 'awarding_office_name': 'object',
 'awarding_sub_agency_code': 'object',
 'awarding_sub_agency_name': 'object',
 'base_and_all_options_value': 'object',
 'base_and_exercised_options_value': 'object',
 'black

In [16]:
# Some helper functions to fix issues with USAspending NaN fields and add PSC_Cat designator

def Fix_Recipient_Name_UNSPECIFIED(df):
    #fix blank or UNSPECIFIED recipient_parent_names
    
#     fix_UNSPECIFIED_lambda = lambda x: x['recipient_name'] + "_UNSPECIFIED" if x['recipient_parent_name'] in ['', 'UNSPECIFIED'] else x['recipient_parent_name']
#     #pandas.core.frame.DataFrame, dask.dataframe.core.DataFrame
#     if type(df) == dask.dataframe.core.DataFrame:
#         df['recipient_parent_name'] = df.apply(fix_UNSPECIFIED_lambda, axis = 1, meta=('recipient_parent_name', 'object'))
#     else:
#         df['recipient_parent_name'] = df.apply(fix_UNSPECIFIED_lambda, axis = 1)
    
    df['recipient_parent_name'] = df['recipient_parent_name'].mask(df['recipient_parent_name'] == 'UNSPECIFIED', df['recipient_parent_name'] + "_UNSPECIFIED") # use of mask instead of other options - https://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.DataFrame.mask

    return

def Add_PSC_Cat_Fields(df):
    print(f"{Get_Current_Time()} -> adding PSC_Cat field...")
    df['PSC_Cat'] = df['product_or_service_code'].str[:1]
    df['PSC_Cat_2'] = df['product_or_service_code'].str[:2]
    return

def Enhance_Spending_File(df):
    #Fix_Recipient_Name_UNSPECIFIED(df)
    Add_PSC_Cat_Fields(df)
    df = df.fillna("UNSPECIFIED")
    return df

In [17]:
# we do not need all of the 280+ fields from the records
# usecols = in pd.read_csv and Dask allows one to be selective on the fields and save memory

usecols = ['action_date',
           'action_date_fiscal_year',
           'recipient_parent_name',
           'recipient_name',
           'federal_action_obligation',
           'funding_agency_name',
           'funding_sub_agency_name',
           'funding_office_name',
           'product_or_service_code_description',
           'product_or_service_code',
           'program_acronym',
           'award_description',
           "solicitation_identifier", # this will be useful to link to Sol# field in beta.SAM.gov award and opportunity archives
          ]

In [18]:
%%time

# use Dask

USAspending_parquet_file_name = "USAspending_GFY2016_GFY2020ytd_Time_Series_Analytics.parquet"

if glob.glob(USAspending_parquet_file_name): #if this file read has already happened, read from parquet file
    print(f"Reading parquet file: {USAspending_parquet_file_name} instead of CSV file sources.")
    df = pd.read_parquet(USAspending_parquet_file_name).fillna("UNSPECIFIED")
else:
    # use Dask
    #df = dd.read_csv(csv_files_list_restricted, usecols = usecols)
    # use pandas
    df = Load_CSV_Files_Into_DF(csv_files_list_restricted, usecols = usecols)

    df = Enhance_Spending_File(df) #fix things like NaN to UNSPECIFIED and add PSC_Cat and PSC_Cat2
    
    # Save the data in a parquet file to restart below if needed
    print(f"Saving the data to a parquet file: {USAspending_parquet_file_name} in local directory.")
    df.to_parquet(USAspending_parquet_file_name)
    #df.to_csv("USAspending_GFY2010_GFY2020ytd_Time_Series_Analytics_Fields_Subset.csv", index=False)

df.head()

Reading parquet file: USAspending_GFY2016_GFY2020ytd_Time_Series_Analytics.parquet instead of CSV file sources.
CPU times: user 1min 5s, sys: 20.4 s, total: 1min 25s
Wall time: 54.5 s


Unnamed: 0,federal_action_obligation,action_date,action_date_fiscal_year,funding_agency_name,funding_sub_agency_name,funding_office_name,recipient_name,recipient_parent_name,award_description,solicitation_identifier,product_or_service_code,product_or_service_code_description,program_acronym,PSC_Cat,PSC_Cat_2
0,3960.0,2016-07-14,2016,DEPARTMENT OF DEFENSE (DOD),DEPT OF THE ARMY,W4GG HQ US ARMY TACOM,"ALCOA GLOBAL FASTENERS, INC.",ALCOA INC.,"TOOL KIT, SCREW THREAD INSERT: *****IF POSSIBL...",UNSPECIFIED,5180,"SETS, KITS, AND OUTFITS OF HAND TOOLS",UNSPECIFIED,5,51
1,20878.0,2016-08-03,2016,DEPARTMENT OF DEFENSE (DOD),DEFENSE LOGISTICS AGENCY,DLA TROOP SUPPORT,"ALCOA GLOBAL FASTENERS, INC.",ALCOA INC.,"8503498385!NUT ASSEMBLY,SELF-L",SPE5E915TC028,5310,NUTS AND WASHERS,UNSPECIFIED,5,53
2,0.0,2016-05-19,2016,DEPARTMENT OF DEFENSE (DOD),DEPT OF THE NAVY,SUBMEPP ACTIVITY,AMERICAN SYSTEMS CORPORATION,AMERICAN SYSTEMS CORPORATION,IGF::CT::IGF OPTIONAL TASK 6.B,UNSPECIFIED,D399,IT AND TELECOM- OTHER IT AND TELECOMMUNICATIONS,UNSPECIFIED,D,D3
3,171946.44,2016-06-06,2016,DEPARTMENT OF JUSTICE (DOJ),OFFICE OF JUSTICE PROGRAMS,OJP CONTRACTS OFFICE,CDW GOVERNMENT LLC,CDW CORPORATION,IGF::OT::IGF MICROSOFT DYNAMICS CRM FOR PSOB,DJO-OCIO-16-Q-0091,7030,INFORMATION TECHNOLOGY SOFTWARE,UNSPECIFIED,7,70
4,1173901.0,2016-06-24,2016,DEPARTMENT OF HOMELAND SECURITY (DHS),TRANSPORTATION SECURITY ADMINISTRATION,OFFICE OF ACQUISITION,LOCKHEED MARTIN CORPORATION,LOCKHEED MARTIN CORPORATION,IGF::OT::IGF EXERCISE OPTION PERIOD IN SUPPOR...,UNSPECIFIED,K059,MODIFICATION OF EQUIPMENT- ELECTRICAL AND ELEC...,UNSPECIFIED,K,K0


In [19]:
df.head()

Unnamed: 0,federal_action_obligation,action_date,action_date_fiscal_year,funding_agency_name,funding_sub_agency_name,funding_office_name,recipient_name,recipient_parent_name,award_description,solicitation_identifier,product_or_service_code,product_or_service_code_description,program_acronym,PSC_Cat,PSC_Cat_2
0,3960.0,2016-07-14,2016,DEPARTMENT OF DEFENSE (DOD),DEPT OF THE ARMY,W4GG HQ US ARMY TACOM,"ALCOA GLOBAL FASTENERS, INC.",ALCOA INC.,"TOOL KIT, SCREW THREAD INSERT: *****IF POSSIBL...",UNSPECIFIED,5180,"SETS, KITS, AND OUTFITS OF HAND TOOLS",UNSPECIFIED,5,51
1,20878.0,2016-08-03,2016,DEPARTMENT OF DEFENSE (DOD),DEFENSE LOGISTICS AGENCY,DLA TROOP SUPPORT,"ALCOA GLOBAL FASTENERS, INC.",ALCOA INC.,"8503498385!NUT ASSEMBLY,SELF-L",SPE5E915TC028,5310,NUTS AND WASHERS,UNSPECIFIED,5,53
2,0.0,2016-05-19,2016,DEPARTMENT OF DEFENSE (DOD),DEPT OF THE NAVY,SUBMEPP ACTIVITY,AMERICAN SYSTEMS CORPORATION,AMERICAN SYSTEMS CORPORATION,IGF::CT::IGF OPTIONAL TASK 6.B,UNSPECIFIED,D399,IT AND TELECOM- OTHER IT AND TELECOMMUNICATIONS,UNSPECIFIED,D,D3
3,171946.44,2016-06-06,2016,DEPARTMENT OF JUSTICE (DOJ),OFFICE OF JUSTICE PROGRAMS,OJP CONTRACTS OFFICE,CDW GOVERNMENT LLC,CDW CORPORATION,IGF::OT::IGF MICROSOFT DYNAMICS CRM FOR PSOB,DJO-OCIO-16-Q-0091,7030,INFORMATION TECHNOLOGY SOFTWARE,UNSPECIFIED,7,70
4,1173901.0,2016-06-24,2016,DEPARTMENT OF HOMELAND SECURITY (DHS),TRANSPORTATION SECURITY ADMINISTRATION,OFFICE OF ACQUISITION,LOCKHEED MARTIN CORPORATION,LOCKHEED MARTIN CORPORATION,IGF::OT::IGF EXERCISE OPTION PERIOD IN SUPPOR...,UNSPECIFIED,K059,MODIFICATION OF EQUIPMENT- ELECTRICAL AND ELEC...,UNSPECIFIED,K,K0


In [20]:
df.shape

(27386845, 15)

In [21]:
df.memory_usage().sum()/1e9 # memory usgae in GB

3.286421528

### If you don't have the RAM for pandas, use Dask as an alternative to speed search across processors

In [22]:
# client = Client()
# client

# ddf = dd.read_csv(csv_files_list_restricted, usecols = usecols)

# ddf.head()

# %%time 
# # slow since it must read through all of the files
# ddf.groupby('action_date_fiscal_year')['federal_action_obligation'].sum().reset_index().compute()

## Quicklook at product_or_service_code '70' (IT Software and Hardware)

In [23]:
df_70 = df.query("PSC_Cat_2 == '70'") # this is the category the USG uses for HW and SW purchases (outside of 'D' and 'R' services work)

In [24]:
df_70.groupby('action_date_fiscal_year')['federal_action_obligation'].sum().reset_index()

Unnamed: 0,action_date_fiscal_year,federal_action_obligation
0,2016,12440350000.0
1,2017,12937220000.0
2,2018,13085870000.0
3,2019,13925140000.0
4,2020,12466880000.0


In [25]:
df_70.shape

(524789, 15)

In [26]:
program_aconym_list = sorted(df_70['program_acronym'].unique())
program_aconym_list[:10]

['.',
 '00797',
 '1406',
 '2GIT (BPA)',
 '2GIT BPA',
 '2QSBAB',
 '2QSD',
 '8(A)',
 '8(A) IDIQ',
 '8A']

In [27]:
#df_70['program_acronym'].isnull().count()

In [29]:
df_70['award_description'].iloc[0:10]

3            IGF::OT::IGF MICROSOFT DYNAMICS CRM FOR PSOB
12                       FEDERAL SUPPLY SCHEDULE CONTRACT
13      IGF::OT::IGF PHARMACY ONESOURCE VERIFORM (OPTI...
23                                      8502931689!DONGLE
148     MOVE FUNDS TO NEWLY CREATED CLIN SO PHASE 4 TR...
338                              ORACLE SPARC T7-2 SERVER
377                            REALIGN FUNDS TO COVER REA
835     INFORMATION TECHNOLOGY SERVICES MANAGEMENT (IT...
847     INFORMATION TECHNOLOGY SERVICES MANAGEMENT (IT...
1391                       IGF::OT::IFG  LEXISNEXIS IV/IA
Name: award_description, dtype: object

### Collect Names of the Largest Software Companies

In [31]:
sw_url = "https://www.thesoftwarereport.com/the-top-100-software-companies-of-2020/" # this source is arbitrary 

r = requests.get(sw_url)

soup = BeautifulSoup(r.content)

sw_companies_list = []

for item in soup.find_all("strong"):
    some_text = item.get_text().replace("\n","").replace("\xa0","")
    some_text_fixed = re.sub("^.*\\. ","", some_text)
    #print(some_text, some_text_fixed)
    if some_text_fixed != "":
        sw_companies_list.append(some_text_fixed)
        
# clean up list - known problems or ambiguous names
sw_companies_list = sorted([name if name.upper() != "BOARD" else "Board Software" for name in sw_companies_list])
sw_companies_list

sorted(sw_companies_list)

['ActiveCampaign',
 'Akamai',
 'Anaplan',
 'Asana',
 'Atlassian',
 'Automattic',
 'Avature',
 'BambooHR',
 'Bill.com',
 'BlackLine',
 'Board Software',
 'Centrify',
 'Collibra',
 'Conga',
 'Cornerstone',
 'Couchbase',
 'Coupa',
 'Creatio',
 'Cvent',
 'Cylance',
 'Cylynt',
 'DataStax',
 'Databricks',
 'Datadog',
 'Datorama',
 'DigitalOcean',
 'Digitate',
 'DiscoverOrg',
 'DocuSign',
 'Dropbox',
 'Druva',
 'Egnyte',
 'Elastic',
 'Everbridge',
 'Fastly',
 'Freshworks',
 'FullContact',
 'GFI Software (Aurea SMB Solutions)',
 'Gainsight',
 'Gusto',
 'INTERCOM',
 'InVision',
 'Intralinks',
 'Justworks',
 'Kareo',
 'Kyriba',
 'Liquid Web',
 'Looker',
 'Lucid',
 'Magic Software Enterprises',
 'Mendix',
 'MuleSoft',
 'Namely',
 'New Relic',
 'Nintex',
 'ON24',
 'Odoo',
 'Okta',
 'OutSystems',
 'PagerDuty',
 'Payscale',
 'Pipedrive',
 'Proofpoint',
 'Qualtrics',
 'Qualys',
 'SailPoint ',
 'SalesLoft',
 'Salesforce',
 'Schoology',
 'Seismic',
 'ServiceNow',
 'ServiceTitan',
 'SimilarWeb',
 'Sisen

In [32]:
# RPA Software - https://www.g2.com/categories/robotic-process-automation-rpa

additional_sw_companies = [
 'AWS',
 'Adobe',
 'Airtable',
 'Amazon',
 'Amazon Web Services',
 'Anaconda',
 'Appian',
 'Apple',
 'Asana',
 'Automation Anywhere',
 'Azure',
 'Basecamp',
 'Blue Prism',
    'BMC Helix', #https://www.bmc.com/it-solutions/remedy-itsm.html
    'BMC Remedy',
 'Broad Software',
 'Cloudera',
 'Coiled.io',
 'Dask',
 'Databricks',
 'DataDog',
 'Dell',
 'Docker',
 'Elastic',
 'Google',
 'Google Cloud',
 'HEewlett Packard',
 'HPE',
 'IBM',
 'InfoZoom',
 'Intellibot',
 'JIRA',
 'Keeper',
 'Laserfiche',
 'Looker',
 'McAfee',
 'Microsoft',
 'Microstrategy',
 'Monday',
 'MongoDB',
 'NetZoom',
 'Ontic',
 'Oracle',
 'Palantir',
 'Palo Alto Networks',
 'Pandas',
 'Pega Platform',
 'Pegasystems',
 'ProofHub',
 'Qlik',
 'Red Hat',
 #'Remedy',
 'SAP',
 'ServiceNow',
 'Snowflake',
 'SolarWinds',
 'Sumo Logic',
 'Teamwork Projects',
 'Trello',
 'Ubuntu',
 'UiPath',
 'Unity Technologies',
 'VMWare',
 'Workfusion',
 'ZOOM VIDEO', # Zoom by itself generates spurious finds
 'Zoho',
 'saturncloud.io'
]

In [54]:
sw_companies_list.extend(additional_sw_companies)
sw_companies_list = sorted(set(sw_companies_list))

### Find Mentions of Various IT Product Companies

#### Retrieve the Obligation Transaction Records where the Technology Product or Brand Appears

In [34]:
%%time

technology_counts = {}

df_result = pd.DataFrame()

companies_list = sorted(set(sw_companies_list))

# Narrow the search to obligations in PSC_Cat D, R, and 70 - the vast majority of IT-related purchases happen under these codes
selector = (df['PSC_Cat'].isin(['D','R'])) | (df['PSC_Cat_2'].isin(['70']))
df_limited_PSC = df[selector] #search these codes to reduce confusion on the term in other contexts such as USDA or DoD buying "APPLES" (fruit)

CPU times: user 1.76 s, sys: 28.7 ms, total: 1.79 s
Wall time: 1.78 s


In [35]:
df_limited_PSC.shape[0]/df.shape[0] #,  #, sorted(df_limited_PSC.columns)

0.09980821083991237

In [36]:
# Brute force search for the terms in the award_description field

for sw_company_name in tqdm(companies_list):
    #print(sw_company_name, sw_company_name.upper() in entities)
    #if sw_company_name.upper() in entities:
    #print(f"Working on: {sw_company_name}")
    search_term = sw_company_name.upper()
    
    selector = (df_limited_PSC['award_description'].str.contains(search_term, regex=False, case=False)) | (df_limited_PSC['recipient_parent_name'].str.contains(search_term, regex=False, case=False))
    df_temp = df_limited_PSC[selector].copy()

    df_temp['sw_company_name'] = sw_company_name

    df_result = df_result.append(df_temp)
    
df_result.shape

100%|██████████| 160/160 [06:46<00:00,  2.54s/it]


(93164, 16)

In [38]:
# helper function to retrieve a collection of product and company names

def Collect_Records_Parent_Brand(df, collection_of_products):
    df_result = pd.DataFrame()
    for search_term in collection_of_products:
        df_temp = df[df['sw_company_name'].str.contains(search_term, regex=False, case=False)].copy()
        df_result = df_result.append(df_temp)
    
    df_result = df_result.reset_index()
    return df_result

collection_of_products = ['Amazon', 'Amazon Web Services', 'AWS']

df_aws = Collect_Records_Parent_Brand(df_result, collection_of_products)
df_aws.head()

Unnamed: 0,index,federal_action_obligation,action_date,action_date_fiscal_year,funding_agency_name,funding_sub_agency_name,funding_office_name,recipient_name,recipient_parent_name,award_description,solicitation_identifier,product_or_service_code,product_or_service_code_description,program_acronym,PSC_Cat,PSC_Cat_2,sw_company_name
0,11311,20710.77,2016-03-31,2016,DEPARTMENT OF AGRICULTURE (USDA),"USDA, DEPARTMENTAL ADMINISTRATION","USDA, OCIO, NATIONAL INFORMATION TECHNOLOGY CE...",INFORELIANCE SOLUTIONS JV,INFORELIANCE SOLUTIONS JV,THE PRIMARY GOAL OF THIS ACQUISITION IS TO ACQ...,UNSPECIFIED,7030,INFORMATION TECHNOLOGY SOFTWARE,UNSPECIFIED,7,70,Amazon
1,12144,51100.0,2016-02-23,2016,DEPARTMENT OF DEFENSE (DOD),DEPT OF THE NAVY,SPAWAR SYSTEMS CENTER ATLANTIC,"SMARTRONIX, INC.",SMARTRONIX INC.,"AMAZON WEB SERVICES (AWS) - COMPUTING, STORAGE...",UNSPECIFIED,D316,IT AND TELECOM- TELECOMMUNICATIONS NETWORK MAN...,UNSPECIFIED,D,D3,Amazon
2,12189,436857.1,2016-09-25,2016,DEPARTMENT OF DEFENSE (DOD),DEPT OF THE NAVY,SPAWAR SYSTEMS CENTER ATLANTIC,"SMARTRONIX, INC.",SMARTRONIX INC.,"AMAZON WEB SERVICES (AWS), CLOUD STORAGE. IGF:...",UNSPECIFIED,D316,IT AND TELECOM- TELECOMMUNICATIONS NETWORK MAN...,UNSPECIFIED,D,D3,Amazon
3,12898,0.0,2016-09-14,2016,AGENCY FOR INTERNATIONAL DEVELOPMENT (USAID),AGENCY FOR INTERNATIONAL DEVELOPMENT,USAID M,"AQUILENT, INC",AQUILENT INC,IGF::CL::IGF - SPLUNK AMAZON HOSTING WEB SERVICES,UNSPECIFIED,D317,IT AND TELECOM- WEB-BASED SUBSCRIPTION,UNSPECIFIED,D,D3,Amazon
4,14036,-41202.97,2016-07-29,2016,DEPARTMENT OF DEFENSE (DOD),DEPT OF THE NAVY,SPAWAR SYSTEMS CENTER ATLANTIC,"SMARTRONIX, INC.",SMARTRONIX INC.,AMAZON WEB SERVICES (AWS) CLOUD HOSTING AND ST...,UNSPECIFIED,D316,IT AND TELECOM- TELECOMMUNICATIONS NETWORK MAN...,UNSPECIFIED,D,D3,Amazon


In [39]:
df_aws.groupby(['action_date_fiscal_year'])['federal_action_obligation'].sum().reset_index()

Unnamed: 0,action_date_fiscal_year,federal_action_obligation
0,2016,154624800.0
1,2017,401460100.0
2,2018,558726800.0
3,2019,362289100.0
4,2020,398053100.0


In [42]:
df_aws.query("action_date_fiscal_year == '2019'").groupby(['action_date_fiscal_year', 'funding_agency_name'])['federal_action_obligation'].sum().reset_index().sort_values(by='federal_action_obligation', ascending=False).head(10)

Unnamed: 0,action_date_fiscal_year,funding_agency_name,federal_action_obligation
9,2019,DEPARTMENT OF DEFENSE (DOD),102483300.0
13,2019,DEPARTMENT OF HOMELAND SECURITY (DHS),95169790.0
21,2019,DEPARTMENT OF VETERANS AFFAIRS (VA),34578840.0
12,2019,DEPARTMENT OF HEALTH AND HUMAN SERVICES (HHS),26986190.0
27,2019,NATIONAL AERONAUTICS AND SPACE ADMINISTRATION ...,21909520.0
8,2019,DEPARTMENT OF COMMERCE (DOC),18859290.0
0,2019,AGENCY FOR INTERNATIONAL DEVELOPMENT (USAID),9700184.0
20,2019,DEPARTMENT OF TRANSPORTATION (DOT),9049505.0
19,2019,DEPARTMENT OF THE TREASURY (TREAS),8835650.0
4,2019,CONSUMER FINANCIAL PROTECTION BUREAU (CFPB),5615380.0


## Analysis of the Technology Trends

In [43]:
# ! Note: one can groupby sw_company_name and also assess how much is by recipient_parent_name vs award_description

df_result_grouped = df_result.groupby(['action_date_fiscal_year', 'sw_company_name'])['federal_action_obligation'].agg(["sum", "count"]).reset_index()
df_result_grouped.rename(columns={"sum" : "federal_action_obligation"}, inplace = True)
df_result_grouped.sort_values(by='action_date_fiscal_year', ascending=False).head(10)

Unnamed: 0,action_date_fiscal_year,sw_company_name,federal_action_obligation,count
502,2020,iCIMS,3104560.0,5
438,2020,Gusto,5400.0,5
428,2020,DataStax,1294811.0,11
429,2020,Databricks,4225143.0,8
430,2020,Dell,518024300.0,3142
431,2020,Docker,1255940.0,14
432,2020,DocuSign,7660287.0,29
433,2020,Druva,514776.9,5
434,2020,Elastic,14828350.0,125
435,2020,Everbridge,5305787.0,67


In [44]:
# compute growth ratios for counts

df_result_pivot_count = df_result_grouped.pivot_table(index='sw_company_name', columns='action_date_fiscal_year', values='count').fillna(0.0).reset_index()
df_result_pivot_count['2020_vs_2019_ratio'] = df_result_pivot_count['2020']/df_result_pivot_count['2019']
df_result_pivot_count['2019_vs_2018_ratio'] = df_result_pivot_count['2019']/df_result_pivot_count['2018']
df_result_pivot_count['2019_vs_2016_ratio'] = df_result_pivot_count['2019']/df_result_pivot_count['2016']
df_result_pivot_count = df_result_pivot_count.fillna(0.0)
df_result_pivot_count = df_result_pivot_count.replace(np.inf, 999.0)
df_result_pivot_count

action_date_fiscal_year,sw_company_name,2016,2017,2018,2019,2020,2020_vs_2019_ratio,2019_vs_2018_ratio,2019_vs_2016_ratio
0,AWS,379.0,447.0,484.0,466.0,450.0,0.965665,0.962810,1.229551
1,Adobe,929.0,1137.0,1210.0,921.0,887.0,0.963084,0.761157,0.991389
2,Akamai,67.0,54.0,48.0,53.0,61.0,1.150943,1.104167,0.791045
3,Amazon,159.0,176.0,240.0,203.0,215.0,1.059113,0.845833,1.276730
4,Amazon Web Services,82.0,83.0,150.0,138.0,146.0,1.057971,0.920000,1.682927
...,...,...,...,...,...,...,...,...,...
111,ZOOM VIDEO,2.0,2.0,2.0,4.0,23.0,5.750000,2.000000,2.000000
112,Zaloni,0.0,1.0,0.0,0.0,0.0,0.000000,0.000000,0.000000
113,Zoho,22.0,17.0,16.0,13.0,12.0,0.923077,0.812500,0.590909
114,Zoom,17.0,25.0,31.0,24.0,90.0,3.750000,0.774194,1.411765


In [45]:
# compute growth ratios for obligations affiliated with the search terms

df_result_pivot_funding = df_result_grouped.pivot_table(index='sw_company_name', columns='action_date_fiscal_year', values='federal_action_obligation').fillna(0.0).reset_index()
df_result_pivot_funding['2020_vs_2019_ratio'] = df_result_pivot_funding['2020']/df_result_pivot_funding['2019']
df_result_pivot_funding['2019_vs_2018_ratio'] = df_result_pivot_funding['2019']/df_result_pivot_funding['2018']
df_result_pivot_funding['2019_vs_2016_ratio'] = df_result_pivot_funding['2019']/df_result_pivot_funding['2016']
df_result_pivot_funding = df_result_pivot_funding.fillna(0.0)
df_result_pivot_funding = df_result_pivot_funding.replace(np.inf, 1.0)
df_result_pivot_funding

action_date_fiscal_year,sw_company_name,2016,2017,2018,2019,2020,2020_vs_2019_ratio,2019_vs_2018_ratio,2019_vs_2016_ratio
0,AWS,99908260.54,3.214112e+08,3.945192e+08,1.565817e+08,2.155276e+08,1.376455,0.396892,1.567255
1,Adobe,80461735.48,1.034522e+08,1.185281e+08,1.318615e+08,1.218155e+08,0.923814,1.112492,1.638810
2,Akamai,19278573.91,1.310217e+07,1.961728e+07,1.997611e+07,2.200124e+07,1.101377,1.018292,1.036182
3,Amazon,24483668.79,3.059707e+07,5.775095e+07,7.843264e+07,6.463211e+07,0.824046,1.358119,3.203468
4,Amazon Web Services,15116450.60,2.472593e+07,5.322836e+07,6.363738e+07,5.894670e+07,0.926291,1.195554,4.209809
...,...,...,...,...,...,...,...,...,...
111,ZOOM VIDEO,17473.00,1.991127e+04,3.229500e+04,1.766257e+05,1.066998e+06,6.041009,5.469135,10.108493
112,Zaloni,0.00,-2.531250e+04,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000
113,Zoho,134568.38,4.359474e+05,1.142002e+05,1.520304e+05,1.210131e+05,0.795980,1.331262,1.129763
114,Zoom,467864.89,5.617452e+05,1.381516e+06,5.728122e+05,7.947180e+06,13.873972,0.414626,1.224311


In [46]:
df_result_pivot_count.sort_values(by='2020_vs_2019_ratio', ascending=False).head(25) #999 is for cases where there was no mention in prior period (denominator)

action_date_fiscal_year,sw_company_name,2016,2017,2018,2019,2020,2020_vs_2019_ratio,2019_vs_2018_ratio,2019_vs_2016_ratio
74,Pega Platform,1.0,1.0,0.0,0.0,1.0,999.0,0.0,0.0
6,Anaplan,0.0,0.0,0.0,0.0,1.0,999.0,0.0,0.0
23,Coupa,0.0,0.0,0.0,0.0,1.0,999.0,0.0,0.0
11,Automation Anywhere,0.0,1.0,1.0,0.0,5.0,999.0,0.0,0.0
111,ZOOM VIDEO,2.0,2.0,2.0,4.0,23.0,5.75,2.0,2.0
114,Zoom,17.0,25.0,31.0,24.0,90.0,3.75,0.774194,1.411765
34,DocuSign,3.0,4.0,7.0,8.0,29.0,3.625,1.142857,2.666667
92,Smartsheet,1.0,2.0,7.0,13.0,46.0,3.538462,1.857143,13.0
104,UiPath,0.0,0.0,3.0,32.0,73.0,2.28125,10.666667,999.0
16,Blue Prism,0.0,0.0,0.0,4.0,9.0,2.25,999.0,999.0


In [48]:
# increasing references to technologies with more than 100 citations
selector = (df_result_pivot_count['2019_vs_2018_ratio'] > 1.1) & (df_result_pivot_count['2019'] > 100)
df_result_pivot_count[selector].sort_values(by='2019_vs_2018_ratio', ascending=True).head(50)

action_date_fiscal_year,sw_company_name,2016,2017,2018,2019,2020,2020_vs_2019_ratio,2019_vs_2018_ratio,2019_vs_2016_ratio
99,Tableau,237.0,261.0,260.0,296.0,337.0,1.138514,1.138462,1.248945
83,Salesforce,113.0,173.0,223.0,266.0,294.0,1.105263,1.192825,2.353982
22,Cornerstone,97.0,82.0,108.0,130.0,141.0,1.084615,1.203704,1.340206
13,Azure,49.0,129.0,185.0,232.0,228.0,0.982759,1.254054,4.734694
86,ServiceNow,81.0,112.0,192.0,254.0,245.0,0.964567,1.322917,3.135802


In [49]:
# declining references to technologies with more than 100 citations
selector = (df_result_pivot_count['2019_vs_2018_ratio'] < 0.90) & (df_result_pivot_count['2019'] > 100)
df_result_pivot_count[selector].sort_values(by='2019_vs_2018_ratio', ascending=True).head(50)

action_date_fiscal_year,sw_company_name,2016,2017,2018,2019,2020,2020_vs_2019_ratio,2019_vs_2018_ratio,2019_vs_2016_ratio
24,Creatio,465.0,383.0,423.0,286.0,225.0,0.786713,0.676123,0.615054
1,Adobe,929.0,1137.0,1210.0,921.0,887.0,0.963084,0.761157,0.991389
54,McAfee,253.0,281.0,231.0,179.0,178.0,0.994413,0.774892,0.70751
8,Apple,631.0,678.0,532.0,417.0,426.0,1.021583,0.783835,0.660856
55,Microsoft,2426.0,2657.0,2596.0,2181.0,2382.0,1.09216,0.840139,0.899011
3,Amazon,159.0,176.0,240.0,203.0,215.0,1.059113,0.845833,1.27673
80,Red Hat,585.0,598.0,630.0,537.0,491.0,0.914339,0.852381,0.917949
81,SAP,1140.0,1172.0,1004.0,875.0,717.0,0.819429,0.871514,0.767544


In [50]:
#RPA is a hot technology - any references to one of the Gartner Magic Quadrant firms?

search_term = 'Automation Anywhere' # 'Blue Prism' # 'Workfusion'
df_result_pivot_count[df_result_pivot_count['sw_company_name'].str.contains(search_term, regex=False, case=False)]

action_date_fiscal_year,sw_company_name,2016,2017,2018,2019,2020,2020_vs_2019_ratio,2019_vs_2018_ratio,2019_vs_2016_ratio
11,Automation Anywhere,0.0,1.0,1.0,0.0,5.0,999.0,0.0,0.0


In [53]:
# analyze this from an obligation perspective too

df_result_pivot_funding.sort_values(by='2019_vs_2018_ratio', ascending=False).head(25)

action_date_fiscal_year,sw_company_name,2016,2017,2018,2019,2020,2020_vs_2019_ratio,2019_vs_2018_ratio,2019_vs_2016_ratio
60,Namely,93960.0,13002137.62,74865.0,9707005.0,10000.0,0.00103,129.660122,103.309973
104,UiPath,0.0,0.0,255793.5,3436311.0,8161031.0,2.37494,13.43393,1.0
88,Sisense,19800.0,20790.0,21829.5,274721.0,340867.0,1.240775,12.58485,13.874797
20,Collibra,0.0,496536.08,63264.0,738288.2,1402997.0,1.900338,11.669957,1.0
41,Google Cloud,32000.0,1396563.69,76501.9,495006.9,361950.0,0.731202,6.470517,15.468965
111,ZOOM VIDEO,17473.0,19911.27,32295.0,176625.7,1066998.0,6.041009,5.469135,10.108493
46,InVision,31056.69,176164.7,79485.13,376600.5,691857.7,1.837113,4.737999,12.126227
37,Elastic,952476.88,4046492.76,5380962.0,22542390.0,14828350.0,0.657798,4.189286,23.667125
75,Pegasystems,175138.65,178686.33,3154697.0,11244590.0,9840222.0,0.875108,3.564395,64.203908
76,Proofpoint,2264739.66,10587693.75,2719145.0,9312081.0,10288790.0,1.104886,3.424636,4.111767


In [55]:
#### Which Agencies are using the emerging RPA Tools?

# helper function def Collect_Records_Parent_Brand(df, collection_of_products):

collection_of_products = ["UiPath", "Automation Anywhere", "Blue Prism", "WorkFusion", "PegaSystems"]

df_RPA = Collect_Records_Parent_Brand(df_result, collection_of_products)

df_RPA.groupby(['action_date_fiscal_year', 'funding_agency_name', 'funding_sub_agency_name', 'sw_company_name'])['federal_action_obligation'].sum().reset_index().sort_values(by=['action_date_fiscal_year', 'federal_action_obligation'], ascending=[False, False])

Unnamed: 0,action_date_fiscal_year,funding_agency_name,funding_sub_agency_name,sw_company_name,federal_action_obligation
41,2020,DEPARTMENT OF DEFENSE (DOD),DEPT OF THE AIR FORCE,Pegasystems,4538481.00
60,2020,DEPARTMENT OF JUSTICE (DOJ),FEDERAL BUREAU OF INVESTIGATION,Pegasystems,2702482.20
62,2020,DEPARTMENT OF JUSTICE (DOJ),OFFICE OF JUSTICE PROGRAMS,Pegasystems,2476424.30
76,2020,SOCIAL SECURITY ADMINISTRATION (SSA),SOCIAL SECURITY ADMINISTRATION,UiPath,2424093.25
57,2020,DEPARTMENT OF HOMELAND SECURITY (DHS),U.S. CUSTOMS AND BORDER PROTECTION,UiPath,1948693.00
...,...,...,...,...,...
8,2018,DEPARTMENT OF JUSTICE (DOJ),FEDERAL PRISON SYSTEM / BUREAU OF PRISONS,Pegasystems,4725.00
2,2017,DEPARTMENT OF VETERANS AFFAIRS (VA),"VETERANS AFFAIRS, DEPARTMENT OF",Pegasystems,181620.34
1,2017,DEPARTMENT OF COMMERCE (DOC),US PATENT AND TRADEMARK OFFICE,Pegasystems,-2934.01
3,2017,ENVIRONMENTAL PROTECTION AGENCY (EPA),ENVIRONMENTAL PROTECTION AGENCY,Automation Anywhere,-15274.94


In [69]:
df_RPA_totals = df_RPA.groupby(['action_date_fiscal_year'])['federal_action_obligation'].sum().reset_index()

rpa_chart_totals = alt.Chart(df_RPA_totals).mark_bar().encode(
x='action_date_fiscal_year',
    y='federal_action_obligation'
).properties(
    title='RPA Spending Trends (Gross Level)' + field_name,
    width=400,
    height=400
)

rpa_chart_totals

In [56]:
field_name = 'federal_action_obligation' #

rpa_chart_tools = alt.Chart(df_RPA).mark_bar().encode(
    x='action_date_fiscal_year',
    y=field_name,
    color='sw_company_name:N'
    
).properties(
    title='Top RPA Tool Adoption by ' + field_name,
    width=400,
    height=400
)

rpa_chart_tools

In [57]:
field_name = 'federal_action_obligation' #

rpa_chart_agencies = alt.Chart(df_RPA[df_RPA['federal_action_obligation'] > 0.5e6]).mark_bar().encode(
    x='action_date_fiscal_year',
    y=field_name,
    color='funding_sub_agency_name:N'
    
).properties(
    title='Top Agencies Adopting RPA by ' + field_name,
    width=400,
    height=400
) #.interactive()

rpa_chart_agencies

### Which Services Prime Contractors are Affiliated with these Technologies in GFY2020 and GFY19?

In [77]:
selector = (df_RPA['PSC_Cat'].isin(['D','R']))
df_RPA_totals = df_RPA[selector].groupby(['action_date_fiscal_year', 'recipient_parent_name','sw_company_name',])['federal_action_obligation'].sum().reset_index()
df_RPA_totals.query("action_date_fiscal_year in ['2020','2019']").sort_values(by=['federal_action_obligation'], ascending=[False])

Unnamed: 0,action_date_fiscal_year,recipient_parent_name,sw_company_name,federal_action_obligation
28,2020,REDHAWK IT SOLUTIONS LLC,Pegasystems,2476424.3
15,2019,REDHAWK IT SOLUTIONS LLC,Pegasystems,2129836.63
8,2019,DELOITTE LLP,UiPath,1060477.24
20,2020,DLT SOLUTIONS LLC,Blue Prism,749941.54
9,2019,DLT MERGERCO LLC,Blue Prism,626342.63
29,2020,SCIENCE APPLICATIONS INTERNATIONAL CORPORATION,UiPath,309939.85
13,2019,OPUS GROUP LLC,UiPath,308732.5
18,2020,CARAHSOFT TECHNOLOGY CORPORATION,UiPath,272797.97
11,2019,FEDSTORE CORPORATION,Pegasystems,199655.11
17,2020,BLUE TECH INC.,UiPath,181832.0


In [86]:
df[df['product_or_service_code'].str.startswith('70')][['product_or_service_code', 'product_or_service_code_description']].drop_duplicates().sort_values(by="product_or_service_code")

Unnamed: 0,product_or_service_code,product_or_service_code_description
12,7010,INFORMATION TECHNOLOGY EQUIPMENT SYSTEM CONFIG...
16704,7020,INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT...
2877,7021,INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT...
29280,7022,INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT...
23,7025,INFORMATION TECHNOLOGY INPUT/OUTPUT AND STORAG...
3,7030,INFORMATION TECHNOLOGY SOFTWARE
338,7035,INFORMATION TECHNOLOGY SUPPORT EQUIPMENT
39811,7040,PUNCHED CARD EQUIPMENT
14610,7042,MINI AND MICRO COMPUTER CONTROL DEVICES
2806,7045,INFORMATION TECHNOLOGY SUPPLIES


## Next Steps - NLP Analysis - Match up Awards with beta.SAM.gov Archives

## END of Analysis

#### https://opensource.org/licenses/MIT

#### MIT Open Source License Copyright 2020 Leif C Ulstrup

#### Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

#### The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

#### THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.