In [1]:
# install with pip:
from pyquery import PyQuery as PQ
from titlecase import titlecase

# built-in libraries
import urllib
import csv
import datetime

In [2]:
hostfile = "ftp://dbprftp.state.fl.us/pub/llweb/5fdinspi.csv"
countieswanted = ["Alachua"]

In [3]:
"""
The latter. I want to be able to out a series like “Panera Bread, 123 Main St., Gainesville, had a follow-up inspection on May 21. Inspector found 6 six violations, 1 of them was rated as high. High Priority - Potentially hazardous (time/temperature control for safety) food cold held at greater than 41 degrees Fahrenheit. Cut tomatoes 52°, Manager moved out of the cooler at 11:30, Manager will start using time as a public health control. Reach-in cooler at the cook line: beef 55-56°, cut cabbage 56°, Employee over the items to walk-in cooler. Corrective action taken. Follow-up required.”
"""

'\nThe latter. I want to be able to out a series like “Panera Bread, 123 Main St., Gainesville, had a follow-up inspection on May 21. Inspector found 6 six violations, 1 of them was rated as high. High Priority - Potentially hazardous (time/temperature control for safety) food cold held at greater than 41 degrees Fahrenheit. Cut tomatoes 52°, Manager moved out of the cooler at 11:30, Manager will start using time as a public health control. Reach-in cooler at the cook line: beef 55-56°, cut cabbage 56°, Employee over the items to walk-in cooler. Corrective action taken. Follow-up required.”\n'

In [4]:
fieldnames = ["district", "countynumber", "county", "licensetypecode", 
              "licenseno", "business", "address", "city", "zipcode", 
              "inspectionno", "visitno", "inspclass", "insptype", 
              "inspdisposition", "inspdate_bad", "oldcrit", 
              "oldnoncrit", "viototal", "viohigh", "viointermediate", 
              "viobasic", "pda", "v1", "v2", "v3", "v4", "v5", "v6", 
              "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 
              "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 
              "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 
              "v32", "v33", "v34", "v35", "v36", "v37", "v38", "v39", 
              "v40", "v41", "v42", "v43", "v44", "v45", "v46", "v47", 
              "v48", "v49", "v50", "v51", "v52", "v53", "v54", "v55", 
              "v56", "v57", "v58", "licenseid", "inspvisitid"]
localfilename = hostfile[hostfile.rfind("/") + 1:]   # Take everything after the last slash
with open(localfilename, "wb") as f:   # Download, save as binary. Reopen later as text.
    f.write(urllib.request.urlopen(hostfile).read())

In [5]:
insptypedict = {}
with open("insptypes.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        insptypedict[row["inspdisposition"]] = row["text"]

In [6]:
# extras = ["fulladdy", "peopleaddy", "inspdate", "deets", "zplace", "zinsp", "latlong", "zfolo", "region", "insertdate", "recentinspections"]
def cleanrow(row):
    global insptypedict
    #for item in row:
    #    row[item] = titlecase(row[item])
    row["fulladdy"] = row["address"] + ", " + row["city"] + ", " + row["county"] + " County, FL " + row["zipcode"]
    row["peopleaddy"] = titlecase(row["address"] + ", " + row["city"])
    row["inspdate"] = datetime.datetime.strptime(row["inspdate_bad"], "%m/%d/%Y")
    if "Routine" in row["insptype"]:
        row["insppeople"] = "routine"
    elif "Licensing" in row["insptype"]:
        row["insppeople"] = "licensing"
    elif "Complaint" in row["insptype"]:
        row["insppeople"] = "complaint"
    else:
        row["insppeople"] = "unknown"
    pn = ""   # start building print narrative
    pn += titlecase(row["business"]) + ", "
    pn += row["peopleaddy"] + ", had a " + row["insppeople"] + " inspection "
    pn += get_big_timestamp(row["inspdate"]) + ": "
    pn += insptypedict[row["inspdisposition"]]
    pn += " " + str(row["viototal"]) + " total violations, with "
    pn += str(row["viohigh"]) + " high-priority violations."
    row["printnarrative"] = pn
    return(row)

In [8]:
def get_big_timestamp(date_object=None):
    import datetime
    if not date_object:
        date_object = datetime.datetime.now()
    stamp = ""
    # comment out below if you don't want "Wednesday" or similar in your string
    stamp += datetime.datetime.strftime(date_object, "%A, ")
    if date_object.month == 9:
        stamp += "Sept. " +  datetime.datetime.strftime(date_object, "%d, %Y").lstrip("0")
    elif date_object.month < 3 or date_object.month > 7:
        stamp += datetime.datetime.strftime(date_object, "%b. ") + datetime.datetime.strftime(date_object, "%d").lstrip("0")
    else:
        stamp += datetime.datetime.strftime(date_object, "%B ") + datetime.datetime.strftime(date_object, "%d").lstrip("0")
    # uncomment out below if you want the year
    # stamp += datetime.datetime.strftime(date_object, ", %Y")
    # uncomment below if you want the time
    # stamp += ", at "
    # stamp += datetime.datetime.strftime(date_object, "%I:%M %p").lstrip("0").replace("AM", "a.m.").replace("PM", "p.m.")
    return(stamp)

In [11]:
rawrows = []
with open(localfilename, "r", encoding="ISO-8859-1") as csvfile:
    reader = csv.DictReader(csvfile, fieldnames=fieldnames)
    for row in reader:
        if row['county'] in countieswanted:
            # And later let's check if we already have this inspection ...
            row = cleanrow(row)
            rawrows.append(row)
    reader = None

In [12]:
for row in rawrows[:]:
    print(row["printnarrative"])

Double Envelop 71099, 2500 Ne 39 Ave, Gainesville, had a routine inspection Thursday, Jan. 18: Met standards; earlier problems corrected. 2 total violations, with 0 high-priority violations.
Double Envelop 71099, 2500 Ne 39 Ave, Gainesville, had a routine inspection Thursday, Jan. 11: Follow-up needed: Administrative complaint recommended. 3 total violations, with 0 high-priority violations.
Pfg, 4041 Ne 54 Ave, Gainesville, had a routine inspection Monday, Nov. 27: Met standards. 3 total violations, with 0 high-priority violations.
Bear Archery 72353, 4600 Sw 41st Blvd, Gainesville, had a routine inspection Wednesday, Dec. 13: Met standards. 0 total violations, with 0 high-priority violations.
Hca Putnam Comm Hosp 2, 611 Zeagler Dr, Palatka, had a routine inspection Wednesday, March 28: Met standards; earlier problems corrected. 0 total violations, with 0 high-priority violations.
Hca Putnam Comm Hosp 2, 611 Zeagler Dr, Palatka, had a routine inspection Wednesday, March 21: Time exten

World of Beer, 140 Sw 128th St Suite B, Newberry, had a routine inspection Tuesday, Dec. 5: Met standards. 10 total violations, with 3 high-priority violations.
World of Beer, 140 Sw 128th St Suite B, Newberry, had a routine inspection Wednesday, April 25: Met standards. 3 total violations, with 0 high-priority violations.
China Express of Gainesville, 6250 Nw 23rd St # 4 & 5, Gainesville, had a routine inspection Wednesday, Nov. 22: Met standards. 6 total violations, with 1 high-priority violations.
China Express of Gainesville, 6250 Nw 23rd St # 4 & 5, Gainesville, had a routine inspection Wednesday, March 7: Met standards; earlier problems corrected. 3 total violations, with 0 high-priority violations.
Pizza in the Hood, 14212 Nw 154th Ave, Alachua, had a routine inspection Thursday, July 13: Time extension given to correct earlier problems. 1 total violations, with 0 high-priority violations.
Pizza in the Hood, 14212 Nw 154th Ave, Alachua, had a complaint inspection Friday, Jan. 26