In [None]:
from difflib import SequenceMatcher as SM

def best_match(qmap, school_name, zipcode):
    if zipcode not in qmap:
        return None

    best_ratio = 0
    best_match = None

    for school in qmap[zipcode]:
        ratio = SM(None, school_name, school["school_name"]).ratio()
        if ratio > best_ratio:
            best_ratio = ratio
            best_match = school
    return {"best_match": best_match, "ratio": best_ratio}


In [None]:
from dotenv import load_dotenv
import psycopg2
import os
import json

load_dotenv()

db_password = os.getenv('DB_PASSWORD')
db_user = os.getenv('DB_USER')
db_name = os.getenv('DB_NAME')

connection = psycopg2.connect(
    dbname=db_name,
    user=db_user,
    password=db_password,
    host="localhost",
    port="5432"
)

cursor = connection.cursor()

# SFUSD = '68478'
cursor.execute("SELECT school_code, school_name, zip_code from entities where county_code = '38';")

results = cursor.fetchall();
qmap = {}
for (scode, sname, szip) in results:
    if szip in qmap:
        qmap[szip].append({ "school_code": scode, "school_name": sname })
    else:
        qmap[szip] = [{ "school_code": scode, "school_name": sname }]

cursor.close()
connection.close()


#### read the json file
with open("schoolList.json", "r",encoding="utf-8") as file:
    data = json.load(file)

json_strings = []
for school in data:
    zip = f"no zip for {school['schoolLabel']}"
    if "geolocations" in school:
        location = school["geolocations"][0]["addressDetails"]
        schoolCode = school["schoolCode"] if "schoolCode" in school else "<missing>"
        if "PostalCode" in location:
            zip = location["PostalCode"].split("-")[0]
            best = best_match(qmap, school["schoolLabel"], zip)
            print(f"school: {school['schoolLabel']} ({schoolCode}), zip: {zip}, best: {best}")