In [1]:
from dotenv import load_dotenv
import os
import requests
import json
import re
import fitz
import magic
import pandas as pd
from io import BytesIO
import pendulum
import boto3

In [2]:
load_dotenv()
SAM_PUBLIC_API_KEY = os.environ.get("SAM_PUBLIC_API_KEY")
S3_AWS_ACCESS_KEY_ID = os.environ.get("S3_AWS_ACCESS_KEY_ID")
S3_AWS_SECRET_ACCESS_KEY = os.environ.get("S3_AWS_SECRET_ACCESS_KEY")
S3_REGION_NAME = os.environ.get("S3_REGION_NAME")
S3_BUCKET_OPPORTUNITIES = os.environ.get("S3_BUCKET_OPPORTUNITIES")

In [3]:
previous_date = pendulum.now('utc').subtract(days=1).strftime('%Y%m%d')
formattted_request_date = pendulum.parse(previous_date, strict=False).format('MM/DD/YYYY')
base_url = "https://api.sam.gov/opportunities/v2/search"

Check for hitting API Limit(10/day)

In [4]:
# res = requests.get(f"https://api.sam.gov/opportunities/v2/search?api_key={SAM_PUBLIC_API_KEY}&postedFrom=01/01/2024&postedTo=12/31/2024&ptype=k&limit=1000")
# res

<Response [429]>

In [5]:
res.json()

{'error': {'code': 'OVER_RATE_LIMIT',
  'message': 'The API key has exceeded the rate limits.'}}

Import prior day data from S3

In [6]:
s3_client = boto3.client(
    "s3",
    region_name=S3_REGION_NAME,
    aws_access_key_id=S3_AWS_ACCESS_KEY_ID,
    aws_secret_access_key=S3_AWS_SECRET_ACCESS_KEY,
)
bucket_name = S3_BUCKET_OPPORTUNITIES
file_name = f"daily-opportunity-posts/{previous_date}.json"

In [7]:
s3_response = s3_client.get_object(Bucket=bucket_name, Key=file_name)
s3_response = s3_response['Body'].read().decode('utf-8')
data = json.loads(s3_response)

In [18]:
data_path = f'./data/{previous_date}.json'
if not os.path.exists(data_path):
    with open(data_path, 'w') as f:
        json.dump(data, f, indent=4)

In [8]:
data[0]

{'noticeId': 'ff75c5fa02564937950a05713afcd835',
 'title': '1202RZ22Q0002 - I-BPA (Incident-Blanket Purchase Agreement) â€“ Portable Toilets and Handwashing Stations',
 'solicitationNumber': '1202RZ22Q0002',
 'fullParentPathName': 'AGRICULTURE, DEPARTMENT OF.FOREST SERVICE.USDA-FS, AT-INCIDENT MGT SVCS BRANCH',
 'fullParentPathCode': '012.12C2.1202RZ',
 'postedDate': '2024-03-01',
 'type': 'Solicitation',
 'baseType': 'Solicitation',
 'archiveType': 'auto15',
 'archiveDate': '2024-03-19',
 'typeOfSetAsideDescription': 'Total Small Business Set-Aside (FAR 19.5)',
 'typeOfSetAside': 'SBA',
 'responseDeadLine': '2024-03-04T13:00:00-07:00',
 'naicsCode': '562991',
 'naicsCodes': ['562991'],
 'classificationCode': 'W045',
 'active': 'Yes',
 'award': None,
 'pointOfContact': [{'fax': None,
   'type': 'primary',
   'email': 'Kenneth.C.Miller@USDA.gov',
   'phone': '385-441-2764',
   'title': None,
   'fullName': 'Kenneth Miller'},
  {'fax': None,
   'type': 'secondary',
   'email': 'Donald.Ke

In [9]:
params = {
    "api_key": SAM_PUBLIC_API_KEY,
}


Description data counts towards rate limit

In [13]:

# description_data = requests.get(data[0]["description"], params=params)

Resource Data does NOT count towards rate limit. It has a different url base as well

In [16]:
resource_data = requests.get(data[0]['resourceLinks'][0], params=params)
resource_data

<Response [200]>