In [1]:
# install with pip:
from pyquery import PyQuery as PQ
from titlecase import titlecase

# built-in libraries
import urllib
import csv
import datetime

In [2]:
hostfile = "ftp://dbprftp.state.fl.us/pub/llweb/5fdinspi.csv"
countieswanted = ["Marion"]

In [4]:
fieldnames = ["district", "countynumber", "county", "licensetypecode", 
              "licenseno", "business", "address", "city", "zipcode", 
              "inspectionno", "visitno", "inspclass", "insptype", 
              "inspdisposition", "inspdate_bad", "oldcrit", 
              "oldnoncrit", "viototal", "viohigh", "viointermediate", 
              "viobasic", "pda", "v1", "v2", "v3", "v4", "v5", "v6", 
              "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 
              "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 
              "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 
              "v32", "v33", "v34", "v35", "v36", "v37", "v38", "v39", 
              "v40", "v41", "v42", "v43", "v44", "v45", "v46", "v47", 
              "v48", "v49", "v50", "v51", "v52", "v53", "v54", "v55", 
              "v56", "v57", "v58", "licenseid", "inspvisitid"]
localfilename = hostfile[hostfile.rfind("/") + 1:]   # Take everything after the last slash
with open(localfilename, "wb") as f:   # Download, save as binary. Reopen later as text.
    f.write(urllib.request.urlopen(hostfile).read())

In [5]:
insptypedict = {}
with open("insptypes.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        insptypedict[row["inspdisposition"]] = row["text"]

In [14]:
# extras = ["fulladdy", "peopleaddy", "inspdate", "deets", "zplace", "zinsp", "latlong", "zfolo", "region", "insertdate", "recentinspections"]
def cleanrow(row):
    global insptypedict
    #for item in row:
    #    row[item] = titlecase(row[item])
    row["fulladdy"] = row["address"] + ", " + row["city"] + ", " + row["county"] + " County, FL " + row["zipcode"]
    row["peopleaddy"] = titlecase(row["address"] + ", " + row["city"])
    row["inspdate"] = datetime.datetime.strptime(row["inspdate_bad"], "%m/%d/%Y")
    if "Routine" in row["insptype"]:
        row["insppeople"] = "routine"
    elif "Licensing" in row["insptype"]:
        row["insppeople"] = "licensing"
    elif "Complaint" in row["insptype"]:
        row["insppeople"] = "complaint"
    else:
        row["insppeople"] = "unknown"
    pn = ""   # start building print narrative
    pn += titlecase(row["business"]) + ", "
    pn += row["peopleaddy"] + ", had a " + row["insppeople"] + " inspection "
    pn += get_big_timestamp(row["inspdate"]) + ": "
    pn += insptypedict[row["inspdisposition"]]
    pn += " " + str(row["viototal"]) + " total violations, with "
    pn += str(row["viohigh"]) + " high-priority violations."
    row["printnarrative"] = pn
    return(row)

In [15]:
def get_big_timestamp(date_object=None):
    import datetime
    if not date_object:
        date_object = datetime.datetime.now()
    stamp = ""
    # comment out below if you don't want "Wednesday" or similar in your string
    stamp += datetime.datetime.strftime(date_object, "%A, ")
    if date_object.month == 9:
        stamp += "Sept. " +  datetime.datetime.strftime(date_object, "%d, %Y").lstrip("0")
    elif date_object.month < 3 or date_object.month > 7:
        stamp += datetime.datetime.strftime(date_object, "%b. ") + datetime.datetime.strftime(date_object, "%d").lstrip("0")
    else:
        stamp += datetime.datetime.strftime(date_object, "%B ") + datetime.datetime.strftime(date_object, "%d").lstrip("0")
    # uncomment out below if you want the year
    stamp += datetime.datetime.strftime(date_object, ", %Y")
    # uncomment below if you want the time
    # stamp += ", at "
    # stamp += datetime.datetime.strftime(date_object, "%I:%M %p").lstrip("0").replace("AM", "a.m.").replace("PM", "p.m.")
    return(stamp)

In [16]:
rawrows = []
with open(localfilename, "r", encoding="ISO-8859-1") as csvfile:
    reader = csv.DictReader(csvfile, fieldnames=fieldnames)
    for row in reader:
        if row['county'] in countieswanted:
            # And later let's check if we already have this inspection ...
            row = cleanrow(row)
            rawrows.append(row)
    reader = None

In [18]:
for row in rawrows[:]:
    print(row["printnarrative"])

Paradise Treats, 5656 E Silver Springs Blvd, Silver Springs, had a routine inspection Monday, March 26, 2018: Met standards. 7 total violations, with 0 high-priority violations.
Braised Onion, 754 Ne 25 Ave, Ocala, had a routine inspection Friday, Jan. 19, 2018: Met standards. 8 total violations, with 3 high-priority violations.
Braised Onion, 754 Ne 25 Ave, Ocala, had a routine inspection Wednesday, July 19, 2017: Met standards. 5 total violations, with 4 high-priority violations.
Fat Boys B B Q, 4132 Ne Silver Spgs Blvd, Ocala, had a routine inspection Thursday, Jan. 11, 2018: Met standards. 4 total violations, with 0 high-priority violations.
Fat Boys B B Q, 4132 Ne Silver Spgs Blvd, Ocala, had a routine inspection Tuesday, July 11, 2017: Met standards. 7 total violations, with 4 high-priority violations.
Richard's Place, 316 E Silver Spgs Blvd, Ocala, had a routine inspection Wednesday, March 21, 2018: Met standards. Violations from earlier administrative complaint have been correc

In [11]:
student_tuples = [
    ('john', 'A', 15),
    ('jane', 'B', 12),
    ('dave', 'B', 10),
    ]
sorted(student_tuples, key=lambda student: student[2])   # sort by age

[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]