In [2]:
import json
import requests
import pandas as pd
import geopandas as gpd
import math

In [5]:
def fetch_data():
    base_url = "https://services1.arcgis.com/qAo1OsXi67t7XgmS/arcgis/rest/services/Water_Main_Breaks/FeatureServer/0/query"
    params = {
        "outFields": "*",
        "where": "1=1",
        "f": "geojson",
        "resultOffset": 0,
        "resultRecordCount": 2000
    }

    # First request to get the total number of rows
    response = requests.get(base_url, params=params)
    data = json.loads(response.text)
    total_rows = data["properties"]["maxRecordCount"]

    # Calculate the number of pages
    page_size = 2000
    num_pages = math.ceil(total_rows / page_size)

    # Fetch all pages
    all_features = data["features"]
    for page in range(1, num_pages):
        params["resultOffset"] = page * page_size
        response = requests.get(base_url, params=params)
        data = json.loads(response.text)
        all_features.extend(data["features"])

    df = gpd.GeoDataFrame.from_features(all_features)

    return df

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import urllib.parse
import requests

def query_arcgis_feature_server(url_feature_server=''):
    '''
    This function downloads all of the features available on a given ArcGIS 
    feature server. The function is written to bypass the limitations imposed
    by the online service, such as only returning up to 1,000 or 2,000 featues
    at a time.

    Parameters
    ----------
    url_feature_server : string
        Sting containing the URL of the service API you want to query. It should 
        end in a forward slash and look something like this:
        'https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/USA_Counties/FeatureServer/0/'

    Returns
    -------
    geodata_final : gpd.GeoDataFrame
        This is a GeoDataFrame that contains all of the features from the 
        Feature Server. After calling this function, the `geodata_final` object 
        can be used to store the data on disk in several different formats 
        including, but not limited to, Shapefile (.shp), GeoJSON (.geojson), 
        GeoPackage (.gpkg), or PostGIS.
        See https://geopandas.org/en/stable/docs/user_guide/io.html#writing-spatial-data
        for more details.

    '''
    if url_feature_server == '':
        geodata_final = gpd.GeoDataFrame()
        return geodata_final

    # Fixing last character in case the URL provided didn't end in a 
    # forward slash
    if url_feature_server[-1] != '/':
        url_feature_server = url_feature_server + '/'
    
    # Getting the layer definitions. This contains important info such as the 
    # name of the column used as feature_ids/object_ids, among other things.
    layer_def = requests.get(url_feature_server + '?f=pjson').json()
    
    # The `objectIdField` is the column name used for the 
    # feature_ids/object_ids
    fid_colname = layer_def['objectIdField']
    
    # The `maxRecordCount` tells us the maximum number of records this REST 
    # API service can return at once. The code below is written such that we 
    # perform multiple calls to the API, each one being short enough never to 
    # go beyond this limit.
    record_count_max = layer_def['maxRecordCount']
    
    # Part of the URL that specifically requests only the object IDs
    url_query_get_ids = (f'query?f=geojson&returnIdsOnly=true'
                         f'&where={fid_colname}+is+not+null')
    
    url_comb = url_feature_server + url_query_get_ids
    
    # Getting all the object IDs
    service_request = requests.get(url_comb)
    all_objectids = np.sort(service_request.json()['properties']['objectIds'])
    
    # This variable will store all the parts of the multiple queries. These 
    # parts will, at the end, be concatenated into one large GeoDataFrame.
    geodata_parts = []
    
    # This part of the query is fixed and never actually changes
    url_query_fixed = ('query?f=geojson&outFields=*&where=')
    
    # Identifying the largest query size allowed per request. This will dictate 
    # how many queries will need to be made. We start the search at
    # the max record count, but that generates errors sometimes - the query 
    # might time out because it's too big. If the test query times out, we try 
    # shrink the query size until the test query goes through without 
    # generating a time-out error.
    block_size = min(record_count_max, len(all_objectids))
    worked = False
    while not worked:
        # Moving the "cursors" to their appropriate locations
        id_start = all_objectids[0]
        id_end = all_objectids[block_size-1]

        readable_query_string = (f'{fid_colname}>={id_start} '
                                 f'and {fid_colname}<={id_end}')
        
        url_query_variable =  urllib.parse.quote(readable_query_string)
    
        url_comb = url_feature_server + url_query_fixed + url_query_variable
        
        url_get = requests.get(url_comb)
        
        if 'error' in url_get.json():
            block_size = int(block_size/2)+1
        else:
            geodata_part = gpd.read_file(url_get.text)
            
            geodata_parts.append(geodata_part.copy())
            worked = True
    
    # Performing the actual query to the API multiple times. This skips the 
    # first few rows/features in the data because those rows were already 
    # captured in the query performed in the code chunk above.
    for i in range(block_size, len(all_objectids), block_size):
        # Moving the "cursors" to their appropriate locations and finding the 
        # limits of each block
        sub_list = all_objectids[i:i + block_size]
        id_start = sub_list[0]
        id_end = sub_list[-1]

        readable_query_string = (f'{fid_colname}>={id_start} '
                                 f'and {fid_colname}<={id_end}')
        
        # Encoding from readable text to URL
        url_query_variable =  urllib.parse.quote(readable_query_string)
    
        # Constructing the full request URL
        url_comb = url_feature_server + url_query_fixed + url_query_variable
        
        # Actually performing the query and storing its results in a 
        # GeoDataFrame
        geodata_part =  (gpd.read_file(url_comb, 
                                       driver='GeoJSON'))
        
        # Appending the result to `geodata_parts`
        if geodata_part.shape[0] > 0:
            geodata_parts.append(geodata_part)

    # Concatenating all of the query parts into one large GeoDataFrame
    geodata_final = (pd.concat(geodata_parts, 
                               ignore_index=True)
                     .sort_values(by=fid_colname)
                     .reset_index(drop=True))
    
    # Checking if any object ID is missing
    ids_queried = set(geodata_final[fid_colname])
    for i,this_id in enumerate(all_objectids):
        if this_id not in ids_queried:
            print('WARNING! The following ObjectID is missing from the final '
                  f'GeoDataFrame: ObjectID={this_id}')
            pass
    
    # Checking if any object ID is included twice
    geodata_temp = geodata_final[[fid_colname]].copy()
    geodata_temp['temp'] = 1
    geodata_temp = (geodata_temp
                    .groupby(fid_colname)
                    .agg({'temp':'sum'})
                    .reset_index())
    geodata_temp = geodata_temp.loc[geodata_temp['temp']>1].copy()
    for i,this_id in enumerate(geodata_temp[fid_colname].values):
        n_times = geodata_temp['temp'].values[i]
        print('WARNING! The following ObjectID is included multiple times in'
              f'the final GeoDataFrame: ObjectID={this_id}\tOccurrences={n_times}')
    
    return geodata_final

In [2]:
url = 'https://services1.arcgis.com/qAo1OsXi67t7XgmS/arcgis/rest/services/Water_Main_Breaks/FeatureServer/0/'
df = query_arcgis_feature_server(url)
print(df.shape)
df

(2766, 51)


Unnamed: 0,OBJECTID,WATBREAKINCIDENTID,INCIDENT_DATE,BREAK_TYPE,ROAD_CLOSED,SIDEWALK_CLOSED,HOUR_IMPACTED,UNITS_IMPACTED,CW_SERVICE_REQUEST,STATUS,...,STREET,ASSETID,ASSET_DEPTH,FROST_DEPTH,ASSET_SIZE,ASSET_YEAR_INSTALLED,ASSET_MATERIAL,ASSET_EXISTS,GLOBALID,geometry
0,1,2252,1512141300000,MAIN,Partially Closed,Open,12-16 hours,47,123456.0,REPAIR COMPLETED,...,LANCASTER ST W,134292,1.6,0.30,450.0,1937,CI,Y,3521d297-1a2e-4e7b-a071-fc53ed87e965,POINT (-80.48400 43.46294)
1,7874,1311,985564800000,SERVICE,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,CLOVERDALE CRES,4101323,,,13.0,1965,XXX,Y,72445d62-16a8-43c1-9733-56b06015b077,POINT (-80.51507 43.42274)
2,7875,1325,1157500800000,SERVICE,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,WREN CRES,4099987,,,25.0,1967,XXX,Y,3bdc8931-31c0-4090-a07a-a6847781dd97,POINT (-80.43981 43.44507)
3,7876,1328,1157932800000,SERVICE,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,GREENBROOK DR,4642530,,,25.0,1964,PVC,Y,f75ad0b1-5b2a-4125-8ad5-2b9a037debd7,POINT (-80.51086 43.42648)
4,7877,1308,948931200000,SERVICE,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,MONTGOMERY RD,4100648,,,25.0,1967,XXX,Y,5a3c5d03-0899-4899-95e7-278bc5cbb682,POINT (-80.45752 43.44320)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2761,65602,147996,1672238134000,MAIN,Closed,Open,4-8 hours,0-50,,CANCELLED,...,ROSEMOUNT DR,33460,,,150.0,1961,CI,Y,380c6269-a2fa-440f-ae18-999ff3dadec9,POINT (-80.45760 43.46043)
2762,65921,148014,1672330061000,MAIN,Open,Open,4-8 hours,0-50,,CANCELLED,...,ROSEMOUNT DR,33460,,0.50,150.0,1961,CI,Y,6db006d3-c68b-4e59-8ff9-d9740ce24163,POINT (-80.45757 43.46043)
2763,66241,148034,1672562257000,MAIN,Closed,Open,4-8 hours,0-50,,REPAIR COMPLETED,...,CORNELL AVE,8060,,0.15,150.0,1958,CI,Y,d35e2e39-6365-41e9-9060-19bfa9c2e409,POINT (-80.46366 43.43254)
2764,66561,148054,1672820515000,MAIN,Closed,Open,4-8 hours,0-50,,REPAIR COMPLETED,...,KINGSTON CRES,22200,,,150.0,1966,CI,Y,386aeaf2-4a11-462f-8af2-5dde5abd15f5,POINT (-80.43994 43.45677)


In [3]:
type(df)

geopandas.geodataframe.GeoDataFrame

In [4]:
url = 'https://services1.arcgis.com/qAo1OsXi67t7XgmS/arcgis/rest/services/Water_Mains/FeatureServer/0/'
mains = query_arcgis_feature_server(url)
print(mains.shape)
mains

(15905, 29)


Unnamed: 0,OBJECTID,WATMAINID,STATUS,PRESSURE_ZONE,ROADSEGMENTID,MAP_LABEL,CATEGORY,PIPE_SIZE,MATERIAL,LINED,...,REL_CLEANING_AREA,REL_CLEANING_SUBAREA,UNDERSIZED,SHALLOW_MAIN,CONDITION_SCORE,OVERSIZED,CLEANED,GlobalID,Shape__Length,geometry
0,77753,10080,ACTIVE,KIT 6,5457,34.2m 450mm DI,TREATED,450,DI,NO,...,6,8,N,N,8.50,N,N,ac0f1a53-5695-4017-a66b-0da22fdc4a1a,34.210563,"LINESTRING (-80.54034 43.41771, -80.54036 43.4..."
1,77754,76299,ACTIVE,KIT 4,6390,.4m 300mm DI,TREATED,300,DI,NO,...,1,4,N,N,8.50,N,N,d27251fc-659a-4eff-b857-c70fba7f171e,0.355118,"LINESTRING (-80.50294 43.42350, -80.50294 43.4..."
2,77755,10110,ACTIVE,KIT 6,5393,67.9m 450mm DI,TREATED,450,DI,NO,...,6,8,N,N,6.10,N,Y,61131b80-62f5-4690-bef6-954a1a17bb35,67.852910,"LINESTRING (-80.53911 43.41910, -80.53914 43.4..."
3,77756,82566,ACTIVE,KIT 6,5393,7m 450mm DI,TREATED,450,DI,NO,...,6,8,N,N,4.58,N,Y,b096bc5e-7ad4-4b8d-a63a-f4ad0df3d825,7.039328,"LINESTRING (-80.53956 43.41858, -80.53959 43.4..."
4,77757,82568,ACTIVE,KIT 6,5394,15m 450mm DI,TREATED,450,DI,NO,...,6,8,N,N,8.50,N,Y,182aebfc-a616-49a0-ae2a-76af99b0445d,14.956131,"LINESTRING (-80.53998 43.41811, -80.54009 43.4..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15900,230400,149536,ACTIVE,KIT 2W,102860,123.1m 300mm PVC,TREATED,300,PVC,NO,...,5,44,N,N,-1.00,N,N,cce1f00e-2d0a-496a-b3b8-7ca35e42bf86,123.098600,"LINESTRING (-80.44274 43.36841, -80.44276 43.3..."
15901,230689,148640,ACTIVE,KIT 5,604949,8.9m 0mm PVC,TREATED,0,PVC,NO,...,6,0,N,N,-1.00,N,N,eefc102d-75ab-4cc5-b515-29acf3384a39,8.915553,"LINESTRING (-80.49008 43.37974, -80.48999 43.3..."
15902,230690,148644,ACTIVE,KIT 5,21652,3m 0mm PVC,TREATED,0,PVC,NO,...,6,0,N,N,-1.00,N,N,6ea9ebc4-3e30-45b2-affc-ed6de052328f,2.988428,"LINESTRING (-80.49239 43.37950, -80.49239 43.3..."
15903,230692,149554,ACTIVE,KIT 4,23038,17.8m 0mm DI,TREATED,0,DI,NO,...,5,23,N,N,8.50,N,N,14b4b437-ee61-4d16-aa1e-22a4d679a4a4,17.803460,"LINESTRING (-80.42659 43.43547, -80.42656 43.4..."


In [7]:
# rename mains GlobalID to GLOBALID
mains = mains.rename(columns={'GlobalID': 'GLOBALID'})

In [8]:
# merge the dataframes on the global id
merged_df = df.merge(mains, on='GLOBALID', how='left')
merged_df.sample(7)

Unnamed: 0,OBJECTID_x,WATBREAKINCIDENTID,INCIDENT_DATE,BREAK_TYPE,ROAD_CLOSED,SIDEWALK_CLOSED,HOUR_IMPACTED,UNITS_IMPACTED,CW_SERVICE_REQUEST,STATUS_x,...,CRITICALITY,REL_CLEANING_AREA,REL_CLEANING_SUBAREA,UNDERSIZED,SHALLOW_MAIN,CONDITION_SCORE,OVERSIZED,CLEANED,Shape__Length,geometry_y
226,8211,1433,1262044800000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,,,,,,,,,,
2691,51522,145394,1644994243000,MAIN,Open,Open,4-8 hours,0-50,,REPAIR COMPLETED,...,,,,,,,,,,
2643,44162,144194,1638271095000,MAIN,Closed,Closed,16-20 hours,200-10000,,REPAIR COMPLETED,...,,,,,,,,,,
575,8560,277,879638400000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,,,,,,,,,,
919,8904,1995,1402963200000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,,,,,,,,,,
1734,9719,515,918086400000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,,,,,,,,,,
351,8336,173,1102550400000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,,,,,,,,,,


In [30]:
merged = mains.merge(df, left_on='WATMAINID', right_on='ASSETID', how='outer')

In [31]:
merged.shape

(17473, 110)

In [32]:
merged.sample(25)

Unnamed: 0,OBJECTID_x,WATMAINID_x,STATUS,PRESSURE_ZONE_x,ROADSEGMENTID,MAP_LABEL_x,CATEGORY_x,PIPE_SIZE_x,MATERIAL_x,LINED_x,...,UNDERSIZED_y,SHALLOW_MAIN_y,CONDITION_SCORE_y,OVERSIZED_y,CLEANED_y,GlobalID_y,Shape__Length_y,geometry_y,latitude,longitude
16179,199060.0,143304.0,ACTIVE,KIT 4,22319.0,61.9m 150mm PVC,TREATED,150.0,PVC,NO,...,,,,,,,,,,
368,78130.0,82954.0,ACTIVE,KIT 4,20700.0,7.7m 150mm DI,TREATED,150.0,DI,NO,...,,,,,,,,,,
11851,90313.0,23760.0,ACTIVE,KIT 4,10584.0,85.5m 150mm CI,TREATED,150.0,CI,NO,...,,,,,,,,,,
6677,84904.0,80006.0,ACTIVE,KIT 4,12649.0,91.5m 300mm DI,TREATED,300.0,DI,NO,...,,,,,,,,,,
2762,80664.0,35280.0,ACTIVE,KIT 4,6585.0,230.6m 150mm CI,TREATED,150.0,CI,NO,...,,,,,,,,,43.427706,-80.492606
3850,81821.0,86486.0,ACTIVE,KIT 4,15743.0,5.5m 300mm CI,TREATED,300.0,CI,NO,...,,,,,,,,,,
14091,136306.0,97826.0,ACTIVE,KIT 4,103263.0,2.7m 200mm PVC,TREATED,200.0,PVC,NO,...,,,,,,,,,,
11596,89979.0,42530.0,ACTIVE,KIT 4,6508.0,260.9m 150mm CI,TREATED,150.0,CI,NO,...,,,,,,,,,43.430088,-80.508147
1301,79157.0,68909.0,ACTIVE,KIT 5,600546.0,33m 150mm PVC,TREATED,150.0,PVC,NO,...,,,,,,,,,,
16448,203243.0,145162.0,ACTIVE,KIT 6,105529.0,96.5m 300mm PVC,TREATED,300.0,PVC,NO,...,,,,,,,,,,


In [9]:
# what are the matching columns between the two dataframes?
set(df.columns).intersection(set(mains.columns))

{'GLOBALID', 'OBJECTID', 'ROADSEGMENTID', 'STATUS', 'geometry'}

In [10]:
mains.columns

Index(['OBJECTID', 'WATMAINID', 'STATUS', 'PRESSURE_ZONE', 'ROADSEGMENTID',
       'MAP_LABEL', 'CATEGORY', 'PIPE_SIZE', 'MATERIAL', 'LINED', 'LINED_DATE',
       'LINED_MATERIAL', 'INSTALLATION_DATE', 'ACQUISITION', 'CONSULTANT',
       'OWNERSHIP', 'BRIDGE_MAIN', 'BRIDGE_DETAILS', 'CRITICALITY',
       'REL_CLEANING_AREA', 'REL_CLEANING_SUBAREA', 'UNDERSIZED',
       'SHALLOW_MAIN', 'CONDITION_SCORE', 'OVERSIZED', 'CLEANED', 'GLOBALID',
       'Shape__Length', 'geometry'],
      dtype='object')

In [11]:
df.columns

Index(['OBJECTID', 'WATBREAKINCIDENTID', 'INCIDENT_DATE', 'BREAK_TYPE',
       'ROAD_CLOSED', 'SIDEWALK_CLOSED', 'HOUR_IMPACTED', 'UNITS_IMPACTED',
       'CW_SERVICE_REQUEST', 'STATUS', 'STATUS_DATE', 'WORKORDER',
       'RETURN_TO_NORMAL', 'BREAK_NATURE', 'BREAK_APPARENT_CAUSE',
       'REPAIR_TYPE', 'NEW_SECTION_LENGTH', 'MAINTENANCE_DESC',
       'VALVES_CLOSED', 'VALVES_OPENED', 'HYDRANTS_CALLED_OUT',
       'HYDRANTS_CALLED_BACK_IN', 'POSITIVE_PRESSURE_MAINTANED',
       'AIR_GAP_MAINTANED', 'DISINFECTED', 'MECHANICAL_REMOVAL',
       'FLUSHING_EXCAVATION', 'HIGHER_VELOCITY_FLUSHING', 'ANODE_INSTALLED',
       'BREAK_CATEGORIZATION', 'BACTERIA_TESTING_DATE',
       'HEALTH_DEPT_NOTIFICATION', 'MOECC_SAC_NOTIFICATION',
       'SAC_REFERENCE_NO', 'LOCAL_MOE_OFFICE', 'BWA_DWA', 'BWA_DWA_DECLARED',
       'PROCEEDURES_FOLLOWED', 'RECORD_CHANGE_REQD', 'ROADSEGMENTID',
       'CIVIC_NUMBER', 'STREET', 'ASSETID', 'ASSET_DEPTH', 'FROST_DEPTH',
       'ASSET_SIZE', 'ASSET_YEAR_INSTALLED',

In [12]:
merged_df = df.merge(mains, on='ROADSEGMENTID', how='left')

In [13]:
merged_df.shape

(10720, 79)

In [15]:
merged_df.sample(25)

Unnamed: 0,OBJECTID_x,WATBREAKINCIDENTID,INCIDENT_DATE,BREAK_TYPE,ROAD_CLOSED,SIDEWALK_CLOSED,HOUR_IMPACTED,UNITS_IMPACTED,CW_SERVICE_REQUEST,STATUS_x,...,REL_CLEANING_AREA,REL_CLEANING_SUBAREA,UNDERSIZED,SHALLOW_MAIN,CONDITION_SCORE,OVERSIZED,CLEANED,GLOBALID_y,Shape__Length,geometry_y
1181,8311,351,910224000000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,1,2,N,N,7.75,N,N,ce5647b5-1ff0-4be3-8b88-aeb677ee718f,67.507818,"LINESTRING (-80.50383 43.42856, -80.50384 43.4..."
5725,9448,1269,912643200000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,2,20,N,N,9.85,N,N,81cb9c05-5e28-4e71-914c-63b8d11721c0,2.553896,"LINESTRING (-80.48180 43.44923, -80.48181 43.4..."
6232,9579,1646,662688000000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,6,43,N,N,5.25,N,Y,7c10c475-5740-4d8a-b1fa-e06f6471eb2b,231.952209,"LINESTRING (-80.52895 43.44073, -80.52904 43.4..."
4637,9178,1709,1323129600000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,1,20,N,N,5.35,N,N,c4552871-9841-4394-8f25-5c4929a0a5c4,64.715436,"LINESTRING (-80.50634 43.43625, -80.50630 43.4..."
8887,10253,2417,1546387200000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,2,10,N,N,7.85,N,N,7b3142ee-df10-4c2c-84c7-802f73db5aef,92.145456,"LINESTRING (-80.46043 43.44567, -80.46054 43.4..."
9771,22082,193397,1581453091000,MAIN,Partially Closed,Closed,4-8 hours,0-50,,REPAIR COMPLETED,...,1,31,N,N,5.75,N,N,79e86362-3c3f-413c-9121-b573b5a5213f,189.873043,"LINESTRING (-80.52720 43.45004, -80.52714 43.4..."
2903,8737,994,965260800000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,4,13,N,N,8.5,N,N,9e30096e-fe41-4129-a6ac-b93fc4837fe4,6.718685,"LINESTRING (-80.45348 43.46749, -80.45347 43.4..."
5947,9501,972,1038268800000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,6,20,N,N,6.35,N,Y,eec7046a-f31d-472d-8c76-5b4dd2a4d66e,376.093027,"LINESTRING (-80.51636 43.42254, -80.51640 43.4..."
8386,10134,1473,1269216000000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,1,0,N,N,9.35,N,N,77f6f34d-247b-43a3-b920-40e92292483c,20.985373,"LINESTRING (-80.53282 43.44639, -80.53294 43.4..."
7827,9976,1109,1173052800000,MAIN,Open,Open,8-12 hours,,,REPAIR COMPLETED,...,1,4,N,N,5.45,N,N,839e48c1-3ad1-4d4e-b2a7-1b52f90534eb,63.786966,"LINESTRING (-80.50916 43.42396, -80.50922 43.4..."


In [16]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10720 entries, 0 to 10719
Data columns (total 79 columns):
 #   Column                       Non-Null Count  Dtype   
---  ------                       --------------  -----   
 0   OBJECTID_x                   10720 non-null  int64   
 1   WATBREAKINCIDENTID           10720 non-null  int64   
 2   INCIDENT_DATE                10720 non-null  int64   
 3   BREAK_TYPE                   10720 non-null  object  
 4   ROAD_CLOSED                  10720 non-null  object  
 5   SIDEWALK_CLOSED              10720 non-null  object  
 6   HOUR_IMPACTED                10720 non-null  object  
 7   UNITS_IMPACTED               1123 non-null   object  
 8   CW_SERVICE_REQUEST           75 non-null     float64 
 9   STATUS_x                     10720 non-null  object  
 10  STATUS_DATE                  10099 non-null  float64 
 11  WORKORDER                    6095 non-null   float64 
 12  RETURN_TO_NORMAL             382 non-null    float64 
 13  B