In [6]:
from core import Seed, init

init()

In [7]:
# load all the companies from Airtable
import airtable

companies = airtable.load_into_pandas()
companies

Unnamed: 0,id,createdTime,fields.extra_json,fields.Name,fields.Website,fields.Status,fields.Created,fields.Last Modified,fields.Created By.id,fields.Created By.email,fields.Created By.name,fields.Last Modified By.id,fields.Last Modified By.email,fields.Last Modified By.name,fields.Key Product Name,domain
8,rec8leUf8FyDrIa8x,2024-09-30T20:37:50.000Z,,Akili Interactive,https://www.akiliinteractive.com/,Approved,2024-09-30T20:37:50.000Z,2024-10-02T18:43:58.000Z,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,EndeavorRx,akiliinteractive.com
15,recC1pqGYRxzcmoFs,2024-09-30T20:37:09.000Z,,Synthesize Bio,https://www.synthesize.bio/,Approved,2024-09-30T20:37:09.000Z,2024-09-30T20:41:02.000Z,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,,synthesize.bio
19,recE8NhvREVz0cNIa,2024-09-30T20:34:11.000Z,,98point6,98point6.com,Approved,2024-09-30T20:34:11.000Z,2024-09-30T20:34:20.000Z,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,,98point6.com
23,recFDpfbo41ICr8O8,2024-09-30T20:35:54.000Z,,Kevala,kevala.care,Approved,2024-09-30T20:35:54.000Z,2024-09-30T20:40:58.000Z,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,,kevala.care
47,recSxlXd9ZOPUr8dL,2024-09-30T20:34:55.000Z,,Imagine Pediatrics,imaginepediatrics.org,Approved,2024-09-30T20:34:55.000Z,2024-09-30T20:40:53.000Z,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,,imaginepediatrics.org
51,recU56sRD2MLNN9lz,2024-09-30T20:35:03.000Z,,Pomelo Care,pomelocare.com,Approved,2024-09-30T20:35:03.000Z,2024-09-30T20:40:55.000Z,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,,pomelocare.com
65,recaVe1KAvdyjHkR6,2024-09-30T20:35:22.000Z,,Singularity 6,singularity6.com,Approved,2024-09-30T20:35:22.000Z,2024-09-30T20:40:56.000Z,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,Palia,singularity6.com
87,recwZi2dVuPxfaDnd,2024-09-30T20:36:13.000Z,,Rippling,rippling.com,Approved,2024-09-30T20:36:13.000Z,2024-09-30T20:41:00.000Z,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,usrlUO0c7FWWC9lIZ,keith.trnka@gmail.com,Keith Trnka,,rippling.com


In [8]:
company_objects = airtable.pandas_to_seeds(companies)
company_objects

8     (Akili Interactive, EndeavorRx, akiliinteracti...
15     (Synthesize Bio, Synthesize Bio, synthesize.bio)
19                   (98point6, 98point6, 98point6.com)
23                        (Kevala, Kevala, kevala.care)
47    (Imagine Pediatrics, Imagine Pediatrics, imagi...
51           (Pomelo Care, Pomelo Care, pomelocare.com)
65             (Singularity 6, Palia, singularity6.com)
87                   (Rippling, Rippling, rippling.com)
dtype: object

In [9]:
from datetime import datetime, timedelta
import os
from typing import Optional

target = Seed.init("Imagine Pediatrics", domain="imaginepediatrics.org")
data_folder = "../output/data"

output_json = f"{data_folder}/{target.as_path_v2()}.json"
output_json

def get_file_age(file_path: str) -> Optional[timedelta]:
    if os.path.exists(file_path):
        file_mod_time = datetime.fromtimestamp(os.path.getmtime(file_path))
        return datetime.now() - file_mod_time
    else:
        return None
    
def should_rebuild(file_path: str, max_age: timedelta = timedelta(days=7)) -> bool:
    age = get_file_age(file_path)
    if age is None:
        return True
    return age > max_age

should_rebuild(output_json)


False

In [10]:
# Set the log level
import sys
from loguru import logger
logger.remove()
logger.add(sys.stderr, level="INFO")

import unified

for target in company_objects:
    output_json = f"{data_folder}/{target.as_path_v2()}.json"

    if not should_rebuild(output_json):
        logger.info(f"Skipping {output_json} as it is up to date.")
    else:
        logger.info(f"Building {output_json}...")

        # TODO: Catch exceptions and keep running
        unified_result = await unified.run(
            target, 
            # TODO: Allow some customization of these parameters
            num_reddit_threads=10, 
            max_glassdoor_review_pages=5, 
            max_glassdoor_job_pages=0,
            max_news_articles=20,
            )
        
        with open(output_json, 'w') as json_file:
            json_file.write(unified_result.model_dump_json(indent=2))



[32m2024-10-02 12:24:36.576[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mSkipping ../output/data/Akili_Interactive_EndeavorRx.json as it is up to date.[0m
[32m2024-10-02 12:24:36.577[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mSkipping ../output/data/Synthesize_Bio.json as it is up to date.[0m
[32m2024-10-02 12:24:36.577[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mSkipping ../output/data/98point6.json as it is up to date.[0m
[32m2024-10-02 12:24:36.578[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mSkipping ../output/data/Kevala.json as it is up to date.[0m
[32m2024-10-02 12:24:36.578[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mSkipping ../output/data/Imagine_Pediatrics.json as it is up to date.[0m
[32m2024-10-02 12:24:36.579[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m15[0m - [1mBui