In [11]:
import sys
sys.path
backend_path = '../backend'
if backend_path not in sys.path:
    sys.path.append(backend_path)

In [12]:
sys.path

['/home/peter-legion-wsl2/peter-projects/contract-queue/nbs',
 '/usr/lib/python310.zip',
 '/usr/lib/python3.10',
 '/usr/lib/python3.10/lib-dynload',
 '',
 '/home/peter-legion-wsl2/peter-projects/contract-queue/.venv/lib/python3.10/site-packages',
 '../backend']

In [13]:
import os
import json
import boto3
from dotenv import load_dotenv
from datetime import datetime
import pendulum

from app.core.config import get_app_settings
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from app.models.models import Base, Notice, PointOfContact, OfficeAddress, PlaceOfPerformance, Link, ResourceLink



In [14]:
load_dotenv()
S3_AWS_ACCESS_KEY_ID = os.environ.get("S3_AWS_ACCESS_KEY_ID")
S3_AWS_SECRET_ACCESS_KEY = os.environ.get("S3_AWS_SECRET_ACCESS_KEY")
S3_REGION_NAME = os.environ.get("S3_REGION_NAME")

DATABASE_URL = "postgresql+psycopg2://airflow:airflow@localhost:5432/airflow" 

engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
session = SessionLocal()

In [15]:
DATABASE_URL

'postgresql+psycopg2://airflow:airflow@localhost:5432/airflow'

In [16]:

bucket_name = "sam-gov-opportunities"
previous_date = pendulum.now("utc").subtract(days=1).strftime("%Y%m%d")
file_name = f"daily-opportunity-posts/{previous_date}.json" 


In [17]:
s3_client = boto3.client(
    "s3",
    region_name=S3_REGION_NAME,
    aws_access_key_id=S3_AWS_ACCESS_KEY_ID,
    aws_secret_access_key=S3_AWS_SECRET_ACCESS_KEY,
)

In [18]:
s3_response = s3_client.get_object(Bucket=bucket_name, Key=file_name)
s3_response = s3_response['Body'].read().decode('utf-8')
s3_response = json.loads(s3_response)

In [None]:
for 

In [19]:
s3_response[0:2]

[{'noticeId': 'ffe42d665ee64282b2c07f50fffc139a',
  'title': 'Rough River Mowing and Maintenance IDIQ',
  'solicitationNumber': 'W912QR24R0033',
  'fullParentPathName': 'DEPT OF DEFENSE.DEPT OF THE ARMY.USACE.LRD.W072 ENDIST LOUISVILLE',
  'fullParentPathCode': '021.2100.USACE.LRD.W912QR',
  'postedDate': '2024-03-07',
  'type': 'Solicitation',
  'baseType': 'Solicitation',
  'archiveType': 'autocustom',
  'archiveDate': '2024-03-22',
  'typeOfSetAsideDescription': None,
  'typeOfSetAside': None,
  'responseDeadLine': '2024-03-22T14:00:00-04:00',
  'naicsCode': '561730',
  'naicsCodes': ['561730'],
  'classificationCode': 'S208',
  'active': 'Yes',
  'award': None,
  'pointOfContact': [{'fax': '',
    'type': 'primary',
    'email': 'thomas.s.nauert@usace.army.mil',
    'phone': '',
    'title': None,
    'fullName': 'THOMAS NAUERT'}],
  'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=ffe42d665ee64282b2c07f50fffc139a',
  'organizationType': 'OFFICE',
  'of

In [21]:
def parse_date(iso_str):
    try:
        return datetime.fromisoformat(iso_str.replace('Z', '+00:00'))
    except (TypeError, ValueError):
        return None

for notice_data in s3_response: 
    office_address_data = notice_data.get('officeAddress', {}) 
    office_address = OfficeAddress(zipcode=office_address_data.get('zipcode', None),
                                   city=office_address_data.get('city', None),
                                   countryCode=office_address_data.get('countryCode, None'),
                                   state=office_address_data.get('state', None))
    
    place_of_performance = None 
    place_of_performance_data = notice_data.get('placeOfPerformance')
    if place_of_performance_data:
        place_of_performance = PlaceOfPerformance(city_code=place_of_performance_data.get('city', {}).get('code', None),
                                                city_name=place_of_performance_data.get('city', {}).get('name', None),
                                                state_code=place_of_performance_data.get('state', {}).get('code', None),
                                                state_name=place_of_performance_data.get('state', {}).get('name', None),
                                                country_code=place_of_performance_data.get('country', {}).get('code', None),
                                                country_name=place_of_performance_data.get('country', {}).get('name', None))
    
    notice = Notice(id=notice_data.get('noticeId'),
                title=notice_data.get('title'),
                solicitationNumber=notice_data.get('solicitationNumber'),
                fullParentPathName=notice_data.get('fullParentPathName'),
                fullParentPathCode=notice_data.get('fullParentPathCode'),
                postedDate=parse_date(notice_data.get('postedDate')),
                type=notice_data.get('type'),
                baseType=notice_data.get('baseType'),
                archiveType=notice_data.get('archiveType'),
                archiveDate=parse_date(notice_data.get('archiveDate')),
                typeOfSetAsideDescription=notice_data.get('typeOfSetAsideDescription'),
                typeOfSetAside=notice_data.get('typeOfSetAside'),
                responseDeadLine=parse_date(notice_data.get('responseDeadLine')),
                naicsCode=notice_data.get('naicsCode'),
                naicsCodes=notice_data.get('naicsCodes'),
                classificationCode=notice_data.get('classificationCode'),
                active=notice_data.get('active') == 'Yes',
                description=notice_data.get('description'),
                organizationType=notice_data.get('organizationType'),
                additionalInfoLink=notice_data.get('additionalInfoLink'),
                uiLink=notice_data.get('uiLink'),
                office_address=office_address,
                place_of_performance=place_of_performance)

    poc_data_list = notice_data.get('pointOfContact', [])
    if poc_data:
        for poc_data in poc_data_list:
            poc = PointOfContact(fax=poc_data.get('fax'),
                                type=poc_data.get('type'),
                                email=poc_data.get('email'),
                                phone=poc_data.get('phone'),
                                title=poc_data.get('title'),
                                fullName=poc_data.get('fullName'),
                                notice=notice)
            session.add(poc)
        
    link_data_list = notice_data.get('links', [])
    if link_data_list:
        for link_data in link_data_list:
                    link = Link(rel=link_data.get('rel'), href=link_data.get('href'), notice=notice)
                    session.add(link)

    resource_link_data = notice_data.get('resourceLinks', [])
    if resource_link_data:
        for resource_link in resource_link_data:
                res_link = ResourceLink(url=resource_link, notice=notice)
                session.add(res_link)

    session.add(notice)

session.commit()

IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "notices_pkey"
DETAIL:  Key (id)=(ffe42d665ee64282b2c07f50fffc139a) already exists.

[SQL: INSERT INTO notices (id, title, "solicitationNumber", "fullParentPathName", "fullParentPathCode", "postedDate", type, "baseType", "archiveType", "archiveDate", "typeOfSetAsideDescription", "typeOfSetAside", "responseDeadLine", "naicsCode", "naicsCodes", "classificationCode", active, award, description, "organizationType", "additionalInfoLink", "uiLink", office_address_id, place_of_performance_id) VALUES (%(id)s, %(title)s, %(solicitationNumber)s, %(fullParentPathName)s, %(fullParentPathCode)s, %(postedDate)s, %(type)s, %(baseType)s, %(archiveType)s, %(archiveDate)s, %(typeOfSetAsideDescription)s, %(typeOfSetAside)s, %(responseDeadLine)s, %(naicsCode)s, %(naicsCodes)s::VARCHAR[], %(classificationCode)s, %(active)s, %(award)s, %(description)s, %(organizationType)s, %(additionalInfoLink)s, %(uiLink)s, %(office_address_id)s, %(place_of_performance_id)s)]
[parameters: ({'id': 'ffe42d665ee64282b2c07f50fffc139a', 'title': 'Rough River Mowing and Maintenance IDIQ', 'solicitationNumber': 'W912QR24R0033', 'fullParentPathName': 'DEPT OF DEFENSE.DEPT OF THE ARMY.USACE.LRD.W072 ENDIST LOUISVILLE', 'fullParentPathCode': '021.2100.USACE.LRD.W912QR', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Solicitation', 'archiveType': 'autocustom', 'archiveDate': datetime.datetime(2024, 3, 22, 0, 0), 'typeOfSetAsideDescription': None, 'typeOfSetAside': None, 'responseDeadLine': datetime.datetime(2024, 3, 22, 14, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 'naicsCode': '561730', 'naicsCodes': ['561730'], 'classificationCode': 'S208', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=ffe42d665ee64282b2c07f50fffc139a', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/ffe42d665ee64282b2c07f50fffc139a/view', 'office_address_id': 327, 'place_of_performance_id': 222}, {'id': 'ffafd3dd9cb54aa9995395016608502c', 'title': 'Z--Repair Exhibit Pieces, Channel Islands NP', 'solicitationNumber': '140P8424Q0016', 'fullParentPathName': 'INTERIOR, DEPARTMENT OF THE.NATIONAL PARK SERVICE.PWR SF/SEA MABO(85000)', 'fullParentPathCode': '014.1443.140P85', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Solicitation', 'archiveType': 'auto15', 'archiveDate': datetime.datetime(2024, 4, 16, 0, 0), 'typeOfSetAsideDescription': 'Total Small Business Set-Aside (FAR 19.5)', 'typeOfSetAside': 'SBA', 'responseDeadLine': datetime.datetime(2024, 4, 1, 14, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), 'naicsCode': '238390', 'naicsCodes': ['238390'], 'classificationCode': 'Z2PB', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=ffafd3dd9cb54aa9995395016608502c', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/ffafd3dd9cb54aa9995395016608502c/view', 'office_address_id': 328, 'place_of_performance_id': 223}, {'id': 'fe3b50db95534da0abe4f0618fd57816', 'title': 'EDWAFB - Auto Hobby Uniform Linen Services', 'solicitationNumber': 'FA930124Q0008', 'fullParentPathName': 'DEPT OF DEFENSE.DEPT OF THE AIR FORCE.AIR FORCE MATERIEL COMMAND.AIR FORCE TEST CENTER.FA9301  AFTC PZIO', 'fullParentPathCode': '057.5700.AFMC.AFTC.FA9301', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Solicitation', 'archiveType': 'auto15', 'archiveDate': datetime.datetime(2024, 3, 29, 0, 0), 'typeOfSetAsideDescription': 'Total Small Business Set-Aside (FAR 19.5)', 'typeOfSetAside': 'SBA', 'responseDeadLine': datetime.datetime(2024, 3, 14, 10, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), 'naicsCode': '812332', 'naicsCodes': ['812332'], 'classificationCode': 'S209', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=fe3b50db95534da0abe4f0618fd57816', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/fe3b50db95534da0abe4f0618fd57816/view', 'office_address_id': 329, 'place_of_performance_id': 224}, {'id': 'fd8d2b13872c4925b1e99f0fc73d6489', 'title': 'Z2DA--523-24-004 Fire Door and Damper Repairs', 'solicitationNumber': '36C24124B0013', 'fullParentPathName': 'VETERANS AFFAIRS, DEPARTMENT OF.VETERANS AFFAIRS, DEPARTMENT OF.241-NETWORK CONTRACT OFFICE 01 (36C241)', 'fullParentPathCode': '036.3600.36C241', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Solicitation', 'archiveType': 'autocustom', 'archiveDate': datetime.datetime(2024, 4, 7, 0, 0), 'typeOfSetAsideDescription': None, 'typeOfSetAside': None, 'responseDeadLine': datetime.datetime(2023, 12, 19, 13, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=68400))), 'naicsCode': '236220', 'naicsCodes': ['236220'], 'classificationCode': 'Z2DA', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=fd8d2b13872c4925b1e99f0fc73d6489', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/fd8d2b13872c4925b1e99f0fc73d6489/view', 'office_address_id': 330, 'place_of_performance_id': 225}, {'id': 'fcf44c03145346f79401e2bd91a991b8', 'title': 'PUMP UNIT,CENTRIFUG', 'solicitationNumber': 'N0010424QJB35', 'fullParentPathName': 'DEPT OF DEFENSE.DEPT OF THE NAVY.NAVSUP.NAVSUP WEAPON SYSTEMS SUPPORT.NAVSUP WSS MECHANICSBURG.NAVSUP WEAPON SYSTEMS SUPPORT MECH', 'fullParentPathCode': '017.1700.NAVSUP.NAVSUP WSS.NAVSUP WSS MECH.N00104', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Solicitation', 'archiveType': 'auto15', 'archiveDate': datetime.datetime(2024, 4, 11, 0, 0), 'typeOfSetAsideDescription': None, 'typeOfSetAside': '', 'responseDeadLine': datetime.datetime(2024, 3, 27, 16, 30, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 'naicsCode': '333914', 'naicsCodes': ['333914'], 'classificationCode': '4320', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=fcf44c03145346f79401e2bd91a991b8', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/fcf44c03145346f79401e2bd91a991b8/view', 'office_address_id': 331, 'place_of_performance_id': None}, {'id': 'fcab505da8ac40449bcf95e9e25ffec0', 'title': 'ROOFING AND EXTERIOR PAINTING IDIQ CONTRACT FOR VARIOUS PROJECTS AT THE U. S. MARINE CORPS AIR STATION, IWAKUNI, JAPAN', 'solicitationNumber': 'N4008424B5500', 'fullParentPathName': 'DEPT OF DEFENSE.DEPT OF THE NAVY.NAVFAC.NAVFAC PACIFIC CMD.NAVFAC FAR EAST.NAVFACSYSCOM FAR EAST', 'fullParentPathCode': '017.1700.NAVFAC.NAVFAC PACIFIC CMD.NAVFAC FAR EAST.N40084', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Solicitation', 'archiveType': 'auto15', 'archiveDate': datetime.datetime(2024, 4, 13, 0, 0), 'typeOfSetAsideDescription': None, 'typeOfSetAside': None, 'responseDeadLine': datetime.datetime(2024, 3, 29, 11, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=32400))), 'naicsCode': '238320', 'naicsCodes': ['238320'], 'classificationCode': 'Z2JZ', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=fcab505da8ac40449bcf95e9e25ffec0', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/fcab505da8ac40449bcf95e9e25ffec0/view', 'office_address_id': 332, 'place_of_performance_id': 226}, {'id': 'fa5fda79f3a44aada3ebbda81bdef4f8', 'title': 'EXU1 Building 900 Renovation, Indian Head, Maryland', 'solicitationNumber': 'N4008024R2397', 'fullParentPathName': 'DEPT OF DEFENSE.DEPT OF THE NAVY.NAVFAC.NAVFAC ATLANTIC CMD.NAVFAC WASHINGTON.NAVFACSYSCOM WASHINGTON', 'fullParentPathCode': '017.1700.NAVFAC.NAVFAC ATLANTIC CMD.NAVFAC WASHINGTON.N40080', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Solicitation', 'archiveType': 'autocustom', 'archiveDate': datetime.datetime(2025, 3, 7, 0, 0), 'typeOfSetAsideDescription': '8(a) Set-Aside (FAR 19.8)', 'typeOfSetAside': '8A', 'responseDeadLine': datetime.datetime(2024, 3, 25, 14, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), 'naicsCode': '236220', 'naicsCodes': ['236220'], 'classificationCode': 'Y1AA', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=fa5fda79f3a44aada3ebbda81bdef4f8', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/fa5fda79f3a44aada3ebbda81bdef4f8/view', 'office_address_id': 333, 'place_of_performance_id': None}, {'id': 'f9a97d46461e42f0841d1ee09824de79', 'title': 'DACA675240016100 - United States Army Corps of Engineers (USACE) seeks to lease approximately 1,140 gross square feet of retail space in Ontario, Oregon for an Armed Forces Career Center (Military Recruiting Office)', 'solicitationNumber': 'DACA675240016100', 'fullParentPathName': 'DEPT OF DEFENSE.DEPT OF THE ARMY.USACE.NWD.W071 ENDIST SEATTLE', 'fullParentPathCode': '021.2100.USACE.NWD.W912DW', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Solicitation', 'archiveType': 'auto15', 'archiveDate': datetime.datetime(2024, 4, 15, 0, 0), 'typeOfSetAsideDescription': None, 'typeOfSetAside': None, 'responseDeadLine': datetime.datetime(2024, 3, 31, 23, 59, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=61200))), 'naicsCode': '53112', 'naicsCodes': ['53112'], 'classificationCode': 'X1AA', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=f9a97d46461e42f0841d1ee09824de79', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/f9a97d46461e42f0841d1ee09824de79/view', 'office_address_id': 334, 'place_of_performance_id': 227}  ... displaying 10 of 326 total bound parameter sets ...  {'id': '0103dcd1b8bc4ff781c4d1be73bc6a10', 'title': '760-24-1-050-0377 - Leavenworth CMOP BAG, STORAGE/RESERVOIR (VA-24-00037908)', 'solicitationNumber': '36C77024Q0188', 'fullParentPathName': 'VETERANS AFFAIRS, DEPARTMENT OF.VETERANS AFFAIRS, DEPARTMENT OF.NATIONAL CMOP OFFICE (36C770)', 'fullParentPathCode': '036.3600.36C770', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Presolicitation', 'archiveType': 'autocustom', 'archiveDate': datetime.datetime(2024, 5, 7, 0, 0), 'typeOfSetAsideDescription': 'Total Small Business Set-Aside (FAR 19.5)', 'typeOfSetAside': 'SBA', 'responseDeadLine': datetime.datetime(2024, 3, 14, 16, 0, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=68400))), 'naicsCode': '325413', 'naicsCodes': ['325413'], 'classificationCode': '6515', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=0103dcd1b8bc4ff781c4d1be73bc6a10', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/0103dcd1b8bc4ff781c4d1be73bc6a10/view', 'office_address_id': 651, 'place_of_performance_id': 442}, {'id': '006ed3ead0b04a77b6910cd5c5e94b1f', 'title': 'F-15 HOUSING,ANTIFRICTION', 'solicitationNumber': 'SPRHA1-24-Q-0459', 'fullParentPathName': 'DEPT OF DEFENSE.DEFENSE LOGISTICS AGENCY.DLA AVIATION.DLA AVIATION OGDEN.DLA  AVIATION AT OGDEN, UT', 'fullParentPathCode': '097.97AS.DLA AVIATION.DLA AV OGDEN.SPRHA1', 'postedDate': datetime.datetime(2024, 3, 7, 0, 0), 'type': 'Solicitation', 'baseType': 'Solicitation', 'archiveType': 'autocustom', 'archiveDate': datetime.datetime(2025, 4, 8, 0, 0), 'typeOfSetAsideDescription': '8(a) Sole Source (FAR 19.8)', 'typeOfSetAside': '8AN', 'responseDeadLine': datetime.datetime(2024, 4, 8, 23, 59, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=64800))), 'naicsCode': '333611', 'naicsCodes': ['333611'], 'classificationCode': '2835', 'active': True, 'award': None, 'description': 'https://api.sam.gov/prod/opportunities/v1/noticedesc?noticeid=006ed3ead0b04a77b6910cd5c5e94b1f', 'organizationType': 'OFFICE', 'additionalInfoLink': None, 'uiLink': 'https://sam.gov/opp/006ed3ead0b04a77b6910cd5c5e94b1f/view', 'office_address_id': 652, 'place_of_performance_id': None})]
(Background on this error at: https://sqlalche.me/e/14/gkpj)