## Govt Contracts 

In [52]:
from dotenv import load_dotenv
import os
import requests
import json
import re
import fitz
import magic

from typing import List, Optional, Union
from pydantic import BaseModel, Field, HttpUrl, TypeAdapter

In [2]:
load_dotenv()


True

- [SBA Info](https://www.sba.gov/federal-contracting/contracting-guide/how-win-contracts)
- [Hubzone Map](https://maps.certify.sba.gov/hubzone/map#center=44.722800,-103.249700&zoom=4)
- [DBSB](https://dsbs.sba.gov/search/)
- [GSA API Gateway](https://open.gsa.gov/api/)
- [Opportunities Public API](https://open.gsa.gov/api/get-opportunities-public-api/)

Interfacing System Name: COM

System Description and Function
```
I would like to interface with SAM.gov's Opportunity Management API for the primary purpose of extracting and filtering public contract opportunities. I will utilize advanced algorithms to sort opportunities based on predefined criteria such as industry categories, contract value, and geographical location. The aim is to streamline our procurement process by quickly identifying and presenting the most relevant opportunities to our team. The system will include robust security measures to ensure data integrity and compliance with federal data access regulations. It will be regularly updated to maintain compatibility with SAM.gov API developments and security requirements.

In [3]:
SAM_PUBLIC_API_KEY = os.environ.get("SAM_PUBLIC_API_KEY")
base_url = "https://api.sam.gov/opportunities/v2/search"

In [4]:
res = requests.get(f"https://api.sam.gov/opportunities/v2/search?api_key={SAM_PUBLIC_API_KEY}&postedFrom=01/01/2024&postedTo=12/31/2024&ptype=o&limit=1000")

In [5]:
res.content

b'{\n  "error": {\n    "code": "OVER_RATE_LIMIT",\n    "message": "The API key has exceeded the rate limits."\n  }\n}'

In [6]:
with open('./data/temp-opp-file.json') as f:
    res = json.load(f)


In [7]:
res

[{'noticeId': 'ff75c5fa02564937950a05713afcd835',
  'title': '1202RZ22Q0002 - I-BPA (Incident-Blanket Purchase Agreement) – Portable Toilets and Handwashing Stations',
  'solicitationNumber': '1202RZ22Q0002',
  'fullParentPathName': 'AGRICULTURE, DEPARTMENT OF.FOREST SERVICE.USDA-FS, AT-INCIDENT MGT SVCS BRANCH',
  'fullParentPathCode': '012.12C2.1202RZ',
  'postedDate': '2024-03-01',
  'type': 'Solicitation',
  'baseType': 'Solicitation',
  'archiveType': 'auto15',
  'archiveDate': '2024-03-19',
  'typeOfSetAsideDescription': 'Total Small Business Set-Aside (FAR 19.5)',
  'typeOfSetAside': 'SBA',
  'responseDeadLine': '2024-03-04T13:00:00-07:00',
  'naicsCode': '562991',
  'naicsCodes': ['562991'],
  'classificationCode': 'W045',
  'active': 'Yes',
  'award': None,
  'pointOfContact': [{'fax': None,
    'type': 'primary',
    'email': 'Kenneth.C.Miller@USDA.gov',
    'phone': '385-441-2764',
    'title': None,
    'fullName': 'Kenneth Miller'},
   {'fax': None,
    'type': 'secondary'

In [8]:
# json_dict = ["opportunitiesData"]
# json_dict[0]

### Breaking Down Codes:

- [Blog Post about codes](https://www.wolterskluwer.com/en/expert-insights/how-to-decipher-government-codes-relating-to-contracting)
- [Census NAICS Category Lookup](https://www.census.gov/naics/?input=web&year=2022)

In [9]:
# with open('./data/temp-opp-file.json', 'w') as f:
#     json.dump(json_dict, f, indent=4)

In [10]:
json_dict = res

In [11]:
!ls

240227_init_info.ipynb	240228_contract_opp_df.ipynb  data  resources


In [12]:
search_naics_code = '513210'

## Schema

In [13]:
matched_data = [d for d in json_dict if d.get('naicsCode') == search_naics_code]
len(matched_data)

4

In [14]:
matched_data

[{'noticeId': 'e8225168c5db4451badfca88e5bb9754',
  'title': 'HCD Enterprise Survey Platform',
  'solicitationNumber': '030ADV24R0022',
  'fullParentPathName': 'LIBRARY OF CONGRESS.LIBRARY OF CONGRESS.CONTRACTS SERVICES',
  'fullParentPathCode': '003.0300.030ADV',
  'postedDate': '2024-03-01',
  'type': 'Solicitation',
  'baseType': 'Solicitation',
  'archiveType': 'auto15',
  'archiveDate': '2024-03-23',
  'typeOfSetAsideDescription': None,
  'typeOfSetAside': None,
  'responseDeadLine': '2024-03-08T15:00:00-05:00',
  'naicsCode': '513210',
  'naicsCodes': ['513210'],
  'classificationCode': '7A21',
  'active': 'Yes',
  'award': None,
  'pointOfContact': [{'fax': '',
    'type': 'primary',
    'email': 'rleonhardt@loc.gov',
    'phone': '2029406833',
    'title': None,
    'fullName': 'Riley Leonhardt'},
   {'fax': '',
    'type': 'secondary',
    'email': 'bmatsuoka@loc.gov',
    'phone': '2027070170',
    'title': None,
    'fullName': 'Betsy Lewis-Matsuoka'}],
  'description': 'htt

In [15]:
sam_api_string = f"&api_key={SAM_PUBLIC_API_KEY}"

In [16]:
sample_api_notice_desc = matched_data[0]['description'] + sam_api_string
sample_link = matched_data[0]['resourceLinks'][0] + sam_api_string

In [17]:
matched_data

[{'noticeId': 'e8225168c5db4451badfca88e5bb9754',
  'title': 'HCD Enterprise Survey Platform',
  'solicitationNumber': '030ADV24R0022',
  'fullParentPathName': 'LIBRARY OF CONGRESS.LIBRARY OF CONGRESS.CONTRACTS SERVICES',
  'fullParentPathCode': '003.0300.030ADV',
  'postedDate': '2024-03-01',
  'type': 'Solicitation',
  'baseType': 'Solicitation',
  'archiveType': 'auto15',
  'archiveDate': '2024-03-23',
  'typeOfSetAsideDescription': None,
  'typeOfSetAside': None,
  'responseDeadLine': '2024-03-08T15:00:00-05:00',
  'naicsCode': '513210',
  'naicsCodes': ['513210'],
  'classificationCode': '7A21',
  'active': 'Yes',
  'award': None,
  'pointOfContact': [{'fax': '',
    'type': 'primary',
    'email': 'rleonhardt@loc.gov',
    'phone': '2029406833',
    'title': None,
    'fullName': 'Riley Leonhardt'},
   {'fax': '',
    'type': 'secondary',
    'email': 'bmatsuoka@loc.gov',
    'phone': '2027070170',
    'title': None,
    'fullName': 'Betsy Lewis-Matsuoka'}],
  'description': 'htt

In [21]:
matched_data[0]['resourceLinks']

['https://sam.gov/api/prod/opps/v3/opportunities/resources/files/2c4334be1421489ca8dbcca172766582/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/2c8db8d3f3b4498e932b74defe2dcde2/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/40fdeefe6ea9447b9e3fade20b3cf24a/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/5bdb74554e8f42fd9ca2563282678c52/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/87a2c9963ae64f14b927267a5213b425/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/acfca88cd5ee48f587602fadc25cafa2/download?api_key=null&token=']

In [18]:
sample_link

'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/2c4334be1421489ca8dbcca172766582/download?api_key=null&token=&api_key=yzCTdz9pKfQFQRmb2eFZy6GnsHBcHQgqV4Qsn26H'

`resourceLinks` can be downloaded by removing the token and updating the api key

In [19]:
pattern = 'api_key=null&token='
# sample_links = [re.sub(pattern, '', link)


In [23]:
matched_data

[{'noticeId': 'e8225168c5db4451badfca88e5bb9754',
  'title': 'HCD Enterprise Survey Platform',
  'solicitationNumber': '030ADV24R0022',
  'fullParentPathName': 'LIBRARY OF CONGRESS.LIBRARY OF CONGRESS.CONTRACTS SERVICES',
  'fullParentPathCode': '003.0300.030ADV',
  'postedDate': '2024-03-01',
  'type': 'Solicitation',
  'baseType': 'Solicitation',
  'archiveType': 'auto15',
  'archiveDate': '2024-03-23',
  'typeOfSetAsideDescription': None,
  'typeOfSetAside': None,
  'responseDeadLine': '2024-03-08T15:00:00-05:00',
  'naicsCode': '513210',
  'naicsCodes': ['513210'],
  'classificationCode': '7A21',
  'active': 'Yes',
  'award': None,
  'pointOfContact': [{'fax': '',
    'type': 'primary',
    'email': 'rleonhardt@loc.gov',
    'phone': '2029406833',
    'title': None,
    'fullName': 'Riley Leonhardt'},
   {'fax': '',
    'type': 'secondary',
    'email': 'bmatsuoka@loc.gov',
    'phone': '2027070170',
    'title': None,
    'fullName': 'Betsy Lewis-Matsuoka'}],
  'description': 'htt

In [25]:
[link for link in matched_data[0]['resourceLinks']]

['https://sam.gov/api/prod/opps/v3/opportunities/resources/files/2c4334be1421489ca8dbcca172766582/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/2c8db8d3f3b4498e932b74defe2dcde2/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/40fdeefe6ea9447b9e3fade20b3cf24a/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/5bdb74554e8f42fd9ca2563282678c52/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/87a2c9963ae64f14b927267a5213b425/download?api_key=null&token=',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/acfca88cd5ee48f587602fadc25cafa2/download?api_key=null&token=']

In [41]:
for i in range(len(matched_data)):
    print(matched_data[i]['resourceLinks'])

['https://sam.gov/api/prod/opps/v3/opportunities/resources/files/2c4334be1421489ca8dbcca172766582/download?api_key=null&token=', 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/2c8db8d3f3b4498e932b74defe2dcde2/download?api_key=null&token=', 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/40fdeefe6ea9447b9e3fade20b3cf24a/download?api_key=null&token=', 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/5bdb74554e8f42fd9ca2563282678c52/download?api_key=null&token=', 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/87a2c9963ae64f14b927267a5213b425/download?api_key=null&token=', 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/acfca88cd5ee48f587602fadc25cafa2/download?api_key=null&token=']
['https://sam.gov/api/prod/opps/v3/opportunities/resources/files/0e8741b316a7437b866bbf32d2704c58/download?api_key=null&token=', 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/7f018e18cd5d41c891e4640723d12476/downl

In [42]:
resource_links = []
for i in range(len(matched_data)):
    if matched_data[i]['resourceLinks'] is not None:
        for link in matched_data[i]['resourceLinks']:
            if link is not None:
                resource_links.append(re.sub(pattern, '', link))

In [43]:
resource_links

['https://sam.gov/api/prod/opps/v3/opportunities/resources/files/2c4334be1421489ca8dbcca172766582/download?',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/2c8db8d3f3b4498e932b74defe2dcde2/download?',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/40fdeefe6ea9447b9e3fade20b3cf24a/download?',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/5bdb74554e8f42fd9ca2563282678c52/download?',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/87a2c9963ae64f14b927267a5213b425/download?',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/acfca88cd5ee48f587602fadc25cafa2/download?',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/0e8741b316a7437b866bbf32d2704c58/download?',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/7f018e18cd5d41c891e4640723d12476/download?',
 'https://sam.gov/api/prod/opps/v3/opportunities/resources/files/7a0e43ed00b9481aa9d35403d97f41b6/download?',
 'https://

In [44]:
responses = [requests.get(link) for link in resource_links]

In [53]:
content_types = [response.headers.get('Content-Type') for response in responses]
content_types

['application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream']

In [51]:
file_types = [magic.from_buffer(result.content) for ]

['application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream',
 'application/octet-stream']

In [None]:
res = requests.get(f"https://sam.gov/api/prod/opps/v3/opportunities/resources/files/7f018e18cd5d41c891e4640723d12476/download?api_key={SAM_PUBLIC_API_KEY}")

In [None]:
res.content[:50]

In [None]:
import fitz

In [None]:
with fitz.open(stream=res.content, filetype='pdf') as pdf:
    text = ""
    for page in pdf:
        text += page.get_text()


In [None]:
print(text)

In [None]:
class Award(BaseModel):
    awardee: Optional[dict] = None

class PointOfContact(BaseModel):
    fax: Optional[str] = None
    type: str
    email: str
    phone: Optional[str] = None
    title: Optional[str] = None
    fullName: str

class Address(BaseModel):
    zipcode: Optional[str]
    city: str
    countryCode: str
    state: str

class PlaceOfPerformance(BaseModel):
    city: Optional[Union[dict, str]] = None
    state: Optional[Union[dict, str]] = None
    country: Optional[Union[dict, str]] = None
    zip: Optional[str] = None

class Link(BaseModel):
    rel: str
    href: Optional[HttpUrl] = None

class Notice(BaseModel):
    noticeId: str = Field(..., alias='noticeId')
    title: str
    solicitationNumber: str
    fullParentPathName: str
    fullParentPathCode: str
    postedDate: str
    type: str
    baseType: str
    archiveType: str
    archiveDate: str
    typeOfSetAsideDescription: Optional[str] = None
    typeOfSetAside: Optional[str] = None
    responseDeadLine: str
    naicsCode: str
    naicsCodes: List[str]
    classificationCode: str
    active: str
    award: Optional[Award] = None
    pointOfContact: List[PointOfContact]
    description: HttpUrl
    organizationType: str
    officeAddress: Optional[Address] = None
    placeOfPerformance: Optional[PlaceOfPerformance] = None
    additionalInfoLink: Optional[HttpUrl] = None
    uiLink: HttpUrl
    links: List[Link]
    resourceLinks: Optional[List[HttpUrl]] = None

    class Config:
        populate_by_name = True

In [None]:
json_dict = res.json()["opportunitiesData"]

In [None]:
notice = TypeAdapter(Notice).validate_python(json_dict[0])

In [None]:
Notice.model_validate(json_dict[0])

In [None]:
def validate_notices(notices: List[dict]):
    return [Notice.model_validate(item) for item in notices]

In [None]:
validate_notices(json_dict)