In [11]:
import pandas as pd
from sodapy import Socrata
import numpy as np
import json
from datetime import datetime, timedelta
import dask.dataframe as dd
import dask.array as da
from shapely import wkt
import warnings
import dask.config

dask.config.config['dataframe']['convert-string'] = False
warnings.filterwarnings("ignore")

networkData = dd.read_csv("networks.csv")

In [16]:
def getClient(source,token):
    client = Socrata(source,
                     token,
                     timeout = 100)
    return client

def fetchData(borough, currentDateTime, data = "i4gi-tjb9",timeDelta = 48):
    limit = 5000
    offset = 0

    time2 = currentDateTime.isoformat()
    time1 = currentDateTime + timedelta(hours=-timeDelta)
    time1 = time1.isoformat()
    client = getClient("data.cityofnewyork.us","<API>")
    
    results = client.get(data, 
                         borough = borough, 
                         limit = limit,
                         offset = offset,
                         where = f"DATA_AS_OF between '{time1}' and '{time2}'"
                         )
    
    pdDf = pd.DataFrame.from_records(results)
    results_df = dd.from_pandas(pdDf, npartitions = 5)
    return results_df

def cleanData(data):
    columns = [
        "id",
        "link_id",
        "encoded_poly_line_lvls",
        "owner",
        "transcom_id",
        "borough",
        "link_name"
    ]
    data = data.drop(columns = columns)
    
    return data

def getExplodedData(trafficDf):
    trafficDf["link_points"] = trafficDf['link_points'].apply(lambda x: x.split(" ")[:-1], meta = list)
    trafficDf = trafficDf.explode('link_points')
    trafficDf['pointWKT'] = trafficDf['link_points'].apply(lambda x: f'Point({x.split(",")[1]} {x.split(",")[0]})' if len(x.split(",")) == 2 else np.nan)
    trafficDf = trafficDf.dropna(subset=['pointWKT'])
    return trafficDf

def getInsidePolyPoints(trafficDf, polygon):
    trafficDf['insidePoly'] = trafficDf['pointWKT'].apply(lambda x: polygon.contains(wkt.loads(x)),meta=bool)
    trafficDf = trafficDf[trafficDf['insidePoly'] == True]
    return trafficDf

def getAvgSpeed(trafficDf):
    # dd.to_numeric(trafficDf['speed'], errors='coerce')
    speeds = trafficDf['speed'].values
    computed_speeds = speeds.compute()
    computed_speeds = computed_speeds.astype(float)
    return da.mean(computed_speeds).compute()

def getAvgTrafficInZipcode(borough, polyWKT):
    trafficDf = fetchData(borough, datetime.now())
    if len(trafficDf) != 0:
        trafficDf = cleanData(trafficDf)
        trafficDf = getExplodedData(trafficDf)

        poly = wkt.loads(polyWKT)
        trafficDf = getInsidePolyPoints(trafficDf, poly)
        avgSpeed = getAvgSpeed(trafficDf)
        return avgSpeed
    return -1

def liveTrafficSpeed(polyWKT):
    print(getAvgTrafficInZipcode("Brooklyn", polyWKT))

liveTrafficSpeed("POLYGON ((-74.00561213493349 40.72359177953539, -74.00561213493349 40.727657164155815, -73.9919435977936 40.727657164155815, -73.9919435977936 40.72359177953539, -74.00561213493349 40.72359177953539))")
# trafficDf.head(20)

nan
