In [1]:
from dotenv import load_dotenv
import os
import requests
import json
import re
import fitz
import magic
import pandas as pd
from io import BytesIO
import pendulum
import boto3

In [2]:
load_dotenv()

True

In [3]:
SAM_PUBLIC_API_KEY = os.environ.get("SAM_PUBLIC_API_KEY")
S3_AWS_ACCESS_KEY_ID = os.environ.get("S3_AWS_ACCESS_KEY_ID")
S3_AWS_SECRET_ACCESS_KEY = os.environ.get("S3_AWS_SECRET_ACCESS_KEY")
S3_REGION_NAME = os.environ.get("S3_REGION_NAME")
S3_BUCKET_OPPORTUNITIES = os.environ.get("S3_BUCKET_OPPORTUNITIES") 

In [4]:
previous_date = pendulum.now('utc').subtract(days=1).strftime('%Y%m%d')
formattted_request_date = pendulum.parse(previous_date, strict=False).format('MM/DD/YYYY')
base_url = "https://api.sam.gov/opportunities/v2/search"

In [5]:
params = {
    "api_key": SAM_PUBLIC_API_KEY,
    "postedFrom": formattted_request_date,  
    "postedTo": formattted_request_date,    
    "ptype": "o",
    "limit": 1000,
    "offset": 0
}

In [6]:
# res = requests.get(base_url, params=params)
# res

In [7]:
# data = res.json()

In [8]:
# try:
#     data
# except NameError:
#     data ={'error': {'code': 'OVER_RATE_LIMIT',
#            'message': 'The API key has exceeded the rate limits.'}}

In [9]:
# data_path = f'./data/{previous_day}_full_response.json'
# if not os.path.exists(data_path):
#     with open(data_path, 'w') as f:
#         json.dump(data, f, indent=4)
# else:
#     print(f"{data_path} already exists")

In [10]:
# with open(data_path) as f:
#     data = json.load(f)

In [11]:
all_records = []

In [12]:
while True:
    res = requests.get(base_url, params=params)
    if res.status_code == 200:
        data = res.json()
        records = data.get("opportunitiesData", [])
        all_records.extend(records)

        if len(records) < params["limit"]:
            print(f"Finished with {len(all_records)} records")
            break

        params["offset"] += params["limit"]
    else:
        raise Exception(f"Request failed with status code {res.status_code}")

Finished with 285 records


In [13]:
json_data = json.dumps(all_records)
bytes_data = json_data.encode('utf-8')

In [14]:
s3_client = boto3.client(
    "s3",
    region_name=S3_REGION_NAME,
    aws_access_key_id=S3_AWS_ACCESS_KEY_ID,
    aws_secret_access_key=S3_AWS_SECRET_ACCESS_KEY,
)
bucket_name = S3_BUCKET_OPPORTUNITIES
file_name = f"daily-opportunity-posts/{previous_date}.json"

In [15]:
s3_client.put_object(Bucket=bucket_name, Key=file_name, Body=bytes_data)

{'ResponseMetadata': {'RequestId': 'ASZNERE5B0RWW6C8',
  'HostId': 'ERrA8wvK/9NdbVaI31X4o6u+1MNMMgHlyBeWurqZmbxTdbrLQdC14t1eqELMau9x1klifILBibzgzqm3ZGNcD49kj/48BEWFmc8HWmiUx1M=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'ERrA8wvK/9NdbVaI31X4o6u+1MNMMgHlyBeWurqZmbxTdbrLQdC14t1eqELMau9x1klifILBibzgzqm3ZGNcD49kj/48BEWFmc8HWmiUx1M=',
   'x-amz-request-id': 'ASZNERE5B0RWW6C8',
   'date': 'Sat, 02 Mar 2024 12:22:07 GMT',
   'x-amz-version-id': 'rNpOSiw92pgi.vHIMj29zKOduhTBdYz7',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"76d56df1f5ed93a9554fb010151f7a7a"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"76d56df1f5ed93a9554fb010151f7a7a"',
 'ServerSideEncryption': 'AES256',
 'VersionId': 'rNpOSiw92pgi.vHIMj29zKOduhTBdYz7'}

In [16]:
s3_response = s3_client.get_object(Bucket=bucket_name, Key=file_name)
s3_response = s3_response['Body'].read().decode('utf-8')
s3_response = json.loads(s3_response)

In [17]:
type(s3_response), type(s3_response[0])

(list, dict)

In [18]:
s3_response

[{'noticeId': 'ff75c5fa02564937950a05713afcd835',
  'title': '1202RZ22Q0002 - I-BPA (Incident-Blanket Purchase Agreement) – Portable Toilets and Handwashing Stations',
  'solicitationNumber': '1202RZ22Q0002',
  'fullParentPathName': 'AGRICULTURE, DEPARTMENT OF.FOREST SERVICE.USDA-FS, AT-INCIDENT MGT SVCS BRANCH',
  'fullParentPathCode': '012.12C2.1202RZ',
  'postedDate': '2024-03-01',
  'type': 'Solicitation',
  'baseType': 'Solicitation',
  'archiveType': 'auto15',
  'archiveDate': '2024-03-19',
  'typeOfSetAsideDescription': 'Total Small Business Set-Aside (FAR 19.5)',
  'typeOfSetAside': 'SBA',
  'responseDeadLine': '2024-03-04T13:00:00-07:00',
  'naicsCode': '562991',
  'naicsCodes': ['562991'],
  'classificationCode': 'W045',
  'active': 'Yes',
  'award': None,
  'pointOfContact': [{'fax': None,
    'type': 'primary',
    'email': 'Kenneth.C.Miller@USDA.gov',
    'phone': '385-441-2764',
    'title': None,
    'fullName': 'Kenneth Miller'},
   {'fax': None,
    'type': 'secondary'