In [1]:
# Weather API Key
# api_key uniquely belongs to Sean
api_key_loc = './../weather_apikey.txt'

f = open(api_key_loc, "r")
key = f.readline()
f.close()

In [2]:
import pandas as pd
import numpy as np
import datetime
import json
import requests

data = pd.read_csv('PositiveTrainingData.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11455 entries, 0 to 11454
Data columns (total 18 columns):
Start_Time           11455 non-null object
Start_Lat            11455 non-null float64
Start_Lng            11455 non-null float64
Zipcode              11455 non-null int64
Temperature(F)       11455 non-null float64
Humidity(%)          11455 non-null float64
Visibility(mi)       11455 non-null float64
Wind_Speed(mph)      11455 non-null float64
Weather_Condition    11455 non-null object
Year                 11455 non-null int64
Month                11455 non-null int64
Day                  11455 non-null int64
Hour                 11455 non-null int64
Weekday              11455 non-null int64
Day_of_Year          11455 non-null int64
Cluster              11455 non-null int64
Cluster_Lat          11455 non-null float64
Cluster_Lng          11455 non-null float64
dtypes: float64(8), int64(8), object(2)
memory usage: 1.6+ MB


In [3]:
cluster_by_size = data['Cluster'].value_counts()
cluster_by_size.shape

(138,)

In [4]:
def get_weather_info(lat, lng, year, month, day, hr):
    dt = datetime.date(year, month, day)
    str_hr = ""
    if(hr < 10):
        str_hr = "0"+str(hr)
    else:
        str_hr = str(hr)
    tm = dt.strftime("%Y-%m-%d") + "T" + str_hr + ":00:01"  # get the exact hour's forecast

    url = "https://api.darksky.net/forecast/" + key + \
    "/" + str(lat) + "," + str(lng) + "," + tm + \
    "?exclude=minutely,flags,daily,alerts,hourly"
    
    response = requests.get(url)
    data = response.json()
    return data

In [5]:
def get_exact_month_day(year, day_of_year):
    month_days = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    leap_days = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    leap_year = (year % 4 == 0)
    
    if(day_of_year < 1 or (leap_year and day_of_year > 366) or ((leap_year==False) and day_of_year > 365)): return 0, 0
    
    month = 1
    
    if(day_of_year <= 31): 
        return month, day_of_year
    else:
        if(leap_year != True):
            while(True):
                if(day_of_year > month_days[month-1]):
                    day_of_year -= month_days[month-1]
                    month += 1
                else:
                    break
        else:
            while(True):
                if(day_of_year > leap_days[month-1]):
                    day_of_year -= leap_days[month-1]
                    month += 1
                else:
                    break
    
    return month, day_of_year

In [6]:
import random

accident_features = ['Temperature(F)', 'Humidity(%)', 'Visibility(mi)', 'Wind_Speed(mph)',\
                     'Year', 'Month', 'Day', 'Day_of_Year', 'Hour',\
                     'Cluster', 'Cluster_Lat', 'Cluster_Lng']

non_accident = pd.DataFrame(columns=accident_features)

##################### set up #########################
random.seed() #system time as a seed
min_yr = data['Year'].min()
max_yr = data['Year'].max()

f = open('weather_info.txt', 'w')

cluster_num = 0
while(cluster_num < cluster_by_size.shape[0]):
    # generate smaples cluster by cluster
    cluster_size = cluster_by_size[cluster_num] # e.g.) cluster 0's size = cluster_by_size[0]
    i = 1
    cluster_lat = data[data['Cluster'] == cluster_num]['Cluster_Lat'].unique()[0]
    cluster_lng = data[data['Cluster'] == cluster_num]['Cluster_Lng'].unique()[0]
    print("Cluster # = ", cluster_num)
    print("Cluster Density =", cluster_size)
    print("Cluster Lat = ", cluster_lat)
    print("Cluster Lng = ", cluster_lng)
    
    while (i < cluster_size*3): # generate x3 negative samples for positive samples
        _day = 0
        _hour = random.randint(0,23)
        _year = random.randint(min_yr, max_yr)
        if(_year == 2016):
            _day = random.randint(83,366) # leap year
        elif(_year == 2019):
            _day = random.randint(1, 89)
        else:
            _day = random.randint(1,365)
        
        is_accident = data.loc[(data['Cluster'] == cluster_num) &\
                              (data['Day_of_Year'] == _day) &\
                              (data['Hour'] == _hour) &\
                              (data['Year'] == _year)]
        
        if (is_accident.empty): # no duplice in a cluster at a given a specific time
            _month, _m_day = get_exact_month_day(_year, _day)
            
            print(_year, _month, _m_day, _hour)
            weather_data = get_weather_info(cluster_lat, cluster_lng, _year, _month, _m_day, _hour)
            
            f.write(str(weather_data))
            f.write('\n')
            
            try:
                _temperature = weather_data['currently']['temperature']
                _humidity = weather_data['currently']['humidity'] * 100
                _visibility = weather_data['currently']['visibility']
                _windspeed = weather_data['currently']['windSpeed']
            except KeyError:
                pass
            
            no_accident = pd.DataFrame([[_temperature, _humidity, _visibility, _windspeed,\
                                       _year, _month, _m_day, _day, _hour,\
                                       cluster_num, cluster_lat, cluster_lng]],\
                                      columns = accident_features)
            non_accident = non_accident.append(no_accident, ignore_index = True)
            i += 1
        else:
            print("duplicate found in cluster:", cluster_num, "on ", _day, "th day at ", _hour, "th hour")
    
    print("Cluster # = ", cluster_num, " finished")
    cluster_num += 1 # go to the next cluster

f.close()

Cluster # =  0
Cluster Density = 140
Cluster Lat =  37.250607489051106
Cluster Lng =  -121.91147545985399
2018 6 9 17
2019 3 4 5
2016 8 20 8
2018 10 14 13
2017 4 30 10
2019 2 28 3
2017 3 28 16
2016 9 27 18
2016 7 8 9
2018 1 8 10
2017 12 10 23
2019 3 29 7
2016 9 16 7
2016 9 10 14
2016 8 26 15
2019 1 2 3
2017 8 1 2
2017 12 29 11
2019 2 6 7
2019 3 2 15
2017 6 2 19
2019 1 5 12
2018 9 22 0
2017 11 19 13
2018 6 2 9
2016 7 21 6
2018 1 1 5
2018 5 6 17
2017 8 26 12
2018 8 4 8
2017 12 4 23
2017 7 21 21
2016 7 3 7
2018 9 2 17
2019 2 4 19
2019 1 7 23
2017 7 30 14
2016 8 13 20
2019 2 11 17
2019 2 2 14
2018 6 10 0
2019 2 2 22
2017 3 16 9
2017 12 9 3
2019 3 3 10
2017 1 17 4
2019 3 19 11
2016 9 12 23
2019 2 18 19
2017 3 5 8
2016 9 12 19
2017 10 17 11
2019 2 11 13
2019 3 1 8
2019 1 1 22
2016 11 28 11
2019 2 13 18
2017 10 6 7
2016 11 6 15
2017 3 18 17
2016 9 4 15
2016 4 13 3
2018 5 6 3
2017 11 27 12
2019 3 26 23
2019 1 25 1
2016 6 8 15
duplicate found in cluster: 0 on  142 th day at  17 th hour
2016 10 

2019 1 15 4
2019 2 11 3
2016 8 7 18
2017 11 15 6
2017 11 12 22
2017 8 1 7
2019 1 30 4
2017 8 9 4
2018 11 21 6
2019 1 19 15
2016 8 28 20
2019 1 25 23
2016 5 14 3
2016 6 22 9
2016 7 6 18
2016 3 26 2
2017 9 29 18
2019 3 7 1
2016 9 17 7
2018 10 19 21
2019 1 13 22
2016 10 4 16
2017 12 6 14
2019 3 22 22
2019 1 10 17
2019 3 23 19
2018 7 29 17
2016 4 19 6
2019 2 10 6
2016 6 2 4
2016 12 19 7
2017 8 12 5
2018 2 7 23
2018 1 20 2
2018 8 19 21
2017 3 22 4
2016 7 11 3
2019 3 13 12
2016 11 5 3
2016 7 25 4
2017 12 10 18
2017 3 7 3
2017 9 25 13
2016 4 21 8
2016 7 25 19
2016 10 21 3
2018 3 7 17
2019 3 3 8
2016 10 25 8
2016 9 12 13
2017 8 31 11
2018 2 22 9
2019 1 5 0
2017 3 5 0
2018 5 9 5
2018 2 4 17
2017 4 23 20
2018 6 27 16
2017 1 25 15
2018 11 1 17
2018 12 15 10
2016 11 27 11
2017 4 6 9
2017 7 20 13
2017 9 23 4
2019 3 4 1
2019 3 1 6
2017 2 10 9
2016 8 27 8
2016 11 9 13
2017 8 20 6
2017 2 25 17
2019 3 14 21
2019 1 15 1
2016 6 17 12
2017 7 30 17
2018 3 17 6
2019 3 27 4
2019 1 14 4
2017 6 21 11
2017 12 2

2019 2 1 19
2016 8 14 3
2019 3 22 7
2018 11 13 8
2016 6 6 16
2017 8 15 2
2019 3 21 13
2017 9 26 0
2019 1 2 12
2016 11 13 0
2018 12 17 6
2016 9 8 23
2018 12 24 7
2018 6 5 18
2019 3 21 2
2019 1 4 11
2017 1 31 9
2018 2 27 2
2016 8 12 2
2018 2 1 9
2016 7 10 18
2018 5 4 11
2019 1 15 20
2019 1 2 7
2016 7 17 20
2017 7 31 18
2016 12 15 16
2017 6 1 2
2017 8 8 15
2018 4 28 2
2016 6 14 2
duplicate found in cluster: 4 on  96 th day at  3 th hour
2018 1 5 9
2017 8 27 16
2019 2 23 15
2017 10 23 0
2018 3 11 5
2017 6 7 3
2017 5 5 8
2016 8 13 7
2019 3 23 0
2016 4 15 20
2019 3 10 1
2018 12 31 2
2018 1 3 7
2019 2 20 15
2019 2 4 16
2019 3 22 21
2018 12 31 15
2016 6 2 19
2017 2 18 4
2016 5 7 13
2019 1 3 4
2017 4 14 7
2019 1 18 3
2019 1 13 10
2016 12 5 13
2019 3 20 23
2017 10 27 15
2016 10 24 16
2016 4 6 18
2018 6 21 0
2017 6 22 16
2018 5 7 3
2018 6 26 21
2019 3 8 1
2018 12 21 12
2016 5 28 2
2017 7 28 12
2016 8 12 9
2018 2 24 2
2016 12 13 22
2019 1 6 2
2018 2 13 17
2019 1 21 3
2016 11 18 8
2019 2 3 1
2018 7

2016 5 18 7
2017 11 3 19
2016 5 15 18
2017 7 25 4
2019 2 17 4
2019 2 19 11
2016 4 24 21
2017 4 1 21
2019 2 7 10
2018 4 22 10
2017 4 17 20
2019 1 6 21
2017 8 3 16
2018 7 1 8
2019 3 21 8
2019 3 21 6
2016 8 5 22
2017 3 8 22
2017 8 29 21
2016 11 6 4
2016 9 7 9
2019 1 16 0
2016 5 23 22
2019 1 6 9
2018 3 26 2
2019 2 24 4
2018 6 18 3
2018 10 26 2
2017 11 13 8
2019 2 13 12
2016 9 25 1
2019 3 15 18
2018 5 30 14
2017 8 5 13
2019 2 21 8
2019 2 13 7
2019 2 1 19
2016 6 30 9
2016 10 19 5
2019 3 25 19
2018 2 18 3
2018 11 11 8
2018 5 18 19
2019 3 24 17
2017 8 19 2
2019 1 14 13
2019 1 21 13
2018 5 16 14
2018 3 10 19
2017 12 8 14
2016 7 28 16
2019 3 18 6
2016 12 27 5
2019 2 15 13
2019 3 3 12
2019 1 8 23
2018 3 15 16
2017 3 13 8
2017 12 4 7
2017 7 25 0
2018 2 25 10
2016 6 27 22
2016 10 29 5
2017 5 5 16
2016 3 23 18
2019 3 27 2
2017 12 30 16
Cluster # =  5  finished
Cluster # =  6
Cluster Density = 115
Cluster Lat =  37.35201072340426
Cluster Lng =  -121.83809152127662
2018 6 24 1
2018 4 5 4
2016 5 18 7
2

2019 1 9 18
2019 2 17 10
2019 3 25 4
2019 1 1 23
2018 12 15 15
2019 3 18 10
2019 2 22 11
2017 12 6 20
2016 7 3 15
2016 7 12 4
2016 8 22 18
2019 1 1 18
2017 4 20 4
2017 4 17 0
2016 6 1 23
2016 8 30 22
2019 2 8 23
2016 5 9 10
2018 5 2 12
2017 12 26 5
2019 1 23 17
2016 11 8 15
2019 3 25 1
2016 7 17 12
2019 1 18 14
2018 8 22 5
2019 3 23 8
2016 12 21 19
2019 1 10 12
2016 9 6 10
2017 9 1 15
2018 9 25 22
2016 9 20 7
2017 5 6 15
2017 9 17 15
2017 8 11 13
2017 3 29 2
2018 3 8 7
2016 11 26 20
2018 6 4 17
2019 3 2 16
2018 10 5 15
2017 9 7 1
2018 12 21 10
2019 3 12 10
2018 4 20 14
2017 12 15 9
2017 11 23 13
2019 3 19 9
2019 3 3 5
2017 6 29 2
2017 8 12 13
2019 2 27 17
2017 1 29 14
2016 11 13 4
2016 10 11 0
2016 7 11 18
2017 11 6 14
2016 11 15 2
2019 1 14 19
2018 2 27 10
2017 9 30 22
2017 5 27 23
2019 3 21 10
2017 9 19 15
2017 1 24 1
2018 6 30 13
2018 3 14 22
2016 9 16 2
2019 2 21 22
2018 7 27 22
2019 2 20 23
2018 12 13 6
2019 3 22 11
2018 9 28 16
2017 5 19 16
2019 1 1 22
2016 12 12 6
2017 7 26 21
2

2017 8 2 17
2017 12 2 10
2016 10 18 13
2016 10 11 21
2019 3 30 13
2016 9 30 6
2018 4 16 8
2016 5 19 8
2017 1 25 16
2017 11 10 3
2017 2 9 0
2018 4 28 13
2018 11 15 8
2018 3 25 5
2018 6 17 8
2017 2 9 8
2018 12 13 16
2019 3 18 10
2016 11 12 14
2018 7 20 10
2019 2 1 23
2017 8 11 16
2017 3 4 18
2018 5 24 7
2018 3 29 0
2017 7 9 20
2019 3 5 16
2016 7 23 3
2018 4 25 14
2017 11 29 13
2018 5 15 8
2019 3 17 5
2019 1 4 4
2016 8 26 8
2018 6 22 17
2019 1 14 20
2016 7 16 0
2016 11 28 9
2019 2 2 10
2016 10 15 8
2017 7 5 7
2018 7 15 21
2019 1 21 8
2018 8 23 6
2017 12 25 9
2016 12 30 11
2017 6 6 23
2019 3 18 10
2016 4 26 3
2017 9 16 4
2017 7 12 8
2019 1 11 18
2019 3 27 23
2018 1 7 10
2017 12 28 8
2019 2 8 20
2019 1 11 5
2016 7 24 11
2019 3 16 14
2019 1 30 7
2017 4 18 7
2017 2 26 22
2019 1 13 11
2017 11 13 5
2016 6 3 12
2019 1 24 6
2019 1 11 5
duplicate found in cluster: 11 on  120 th day at  10 th hour
2017 9 25 0
2016 3 26 3
2016 5 18 10
2017 1 13 23
2018 1 3 18
2016 8 5 7
2017 5 31 3
2019 1 20 9
2017 

2018 9 11 20
2016 5 16 16
2017 3 19 22
2016 4 26 7
2019 1 18 14
2016 8 17 22
2016 9 23 4
2016 10 22 12
2017 6 20 0
2018 9 28 13
2017 2 4 16
2017 8 31 20
2016 8 26 11
2016 8 11 2
2019 2 5 5
2018 7 5 17
2016 12 19 9
2019 1 19 4
2017 9 29 18
2019 2 26 5
2019 2 8 21
2016 9 13 4
2019 2 17 10
2019 3 17 5
2017 11 17 11
2018 9 16 10
2018 1 17 3
2016 10 12 20
2018 6 21 20
2017 8 22 8
2018 7 16 9
2016 7 4 18
2018 11 30 4
2019 2 1 6
2019 1 14 11
2016 5 20 0
2017 8 28 11
2019 1 13 20
2016 10 13 8
2018 11 6 16
2016 7 31 5
2017 5 10 14
2019 2 12 18
2018 1 10 20
2016 10 28 11
2019 2 24 21
2017 3 2 9
2018 3 21 0
2016 12 15 16
2016 8 4 1
2016 10 13 13
2018 11 14 10
2019 2 7 21
2016 11 8 9
2017 11 23 15
2018 7 19 15
2016 11 24 16
2019 2 8 16
2017 2 27 11
2017 10 22 12
2017 1 30 19
2018 9 1 22
2018 6 2 4
2018 4 29 0
2019 1 23 11
2018 9 18 2
2016 7 16 4
2018 12 13 8
2019 3 6 3
2019 2 5 15
2016 7 14 10
2016 10 6 11
2016 5 26 20
2017 10 9 13
2016 10 23 21
2016 6 16 6
2016 4 8 20
2017 3 12 17
2018 7 7 20
201

2019 3 16 14
2017 12 31 0
2017 9 21 8
2019 2 4 14
2019 1 31 4
2016 9 12 5
2019 1 15 3
2016 11 27 10
2019 2 2 19
2016 9 29 21
2018 1 30 23
2018 8 5 8
2018 5 16 1
2017 2 8 12
2016 8 2 7
2019 2 11 4
2016 4 29 17
2016 8 30 9
2017 8 31 7
2016 10 25 1
2017 2 27 18
2019 2 22 0
2019 2 27 14
2016 6 8 20
2018 11 27 9
2016 6 10 19
2019 3 12 14
2016 9 27 21
2017 4 25 0
2019 1 7 21
2016 11 9 13
2018 4 15 21
2019 3 22 20
2017 5 3 3
2017 3 1 15
2019 1 22 22
2016 8 11 3
2018 2 24 15
2017 2 16 0
2017 8 5 9
2017 8 31 3
2018 6 3 22
2017 1 13 20
2019 1 10 12
2016 9 16 10
2019 3 10 23
2019 3 8 17
2018 8 9 20
2018 12 22 9
2019 2 9 11
2018 4 5 6
2016 10 2 3
2019 3 15 6
2017 8 21 7
2018 9 20 8
2019 2 16 15
2019 3 1 1
2019 3 9 13
2018 3 1 23
2016 12 2 6
2017 4 24 5
2018 12 7 7
2019 1 1 1
2016 9 2 2
2017 12 9 21
2018 7 10 15
2019 3 6 22
2019 1 11 14
2017 8 27 1
2017 10 11 22
2016 11 1 21
2019 2 26 18
2018 5 4 12
2019 2 26 23
2017 11 3 23
2018 9 11 6
2018 5 6 13
2017 8 4 2
2016 4 14 14
2017 6 9 10
2019 2 10 6
20

2016 12 19 3
2019 2 21 12
2018 2 10 14
2017 2 16 22
2019 2 8 7
2019 2 16 14
2019 3 3 20
2019 3 19 19
2018 5 28 16
2018 1 5 23
2017 6 24 23
2018 1 18 10
2018 2 3 23
2019 1 13 15
2018 12 5 10
2018 2 28 2
2018 7 25 21
2018 5 29 17
2019 2 26 0
2016 3 26 9
2019 2 1 2
2017 10 28 3
2019 1 30 18
2018 12 28 7
2016 10 30 12
2018 7 9 7
2017 2 12 12
2018 4 9 13
2017 12 12 2
2016 8 13 20
2018 10 9 19
2016 10 10 1
2018 3 30 14
2019 3 13 18
2017 2 18 13
2018 10 19 19
2019 1 29 5
2017 7 24 3
2017 8 31 11
2016 4 8 13
2016 11 12 6
2017 5 10 18
2019 2 6 1
2018 5 9 12
2016 10 28 7
2016 6 16 4
2019 2 3 1
2019 3 18 19
2016 12 25 5
2016 7 27 1
2018 11 4 19
2019 1 17 20
2019 3 11 10
2019 1 26 16
2018 9 8 11
2019 3 18 4
2016 7 3 5
2016 7 20 3
2019 2 16 0
2017 9 18 2
2016 4 6 0
2016 5 20 11
2018 7 26 19
duplicate found in cluster: 18 on  252 th day at  9 th hour
2016 4 6 5
2018 2 19 18
2017 10 18 15
2018 6 19 20
2019 3 5 4
2017 8 28 1
2019 2 14 14
2018 12 31 21
2019 3 6 17
2018 1 28 14
2017 7 26 18
2017 1 20 10

2018 5 26 21
2019 3 14 20
2017 3 29 12
2017 11 24 23
2016 6 24 0
2016 4 25 20
2017 8 31 8
2016 5 6 2
2017 3 3 16
duplicate found in cluster: 18 on  265 th day at  18 th hour
2016 4 29 3
2016 8 17 6
2018 9 29 9
2018 12 17 1
Cluster # =  18  finished
Cluster # =  19
Cluster Density = 25
Cluster Lat =  37.37199116666667
Cluster Lng =  -121.91776645833332
2019 3 15 6
2018 7 27 0
2017 4 25 18
2016 4 23 14
2019 3 24 23
2017 4 30 21
2018 7 18 23
2017 8 5 3
2017 6 26 14
2018 3 12 7
2018 2 11 11
2016 4 19 1
2018 3 12 20
2019 1 28 2
2019 1 8 14
2017 1 13 16
2019 2 28 8
2019 2 17 23
2017 8 23 20
2018 3 6 3
2017 2 22 14
2018 11 28 23
2019 3 16 18
2017 9 9 12
2018 4 3 9
2016 8 22 7
2017 9 5 12
2019 1 21 20
2016 7 15 14
2018 12 14 2
2019 1 6 20
2016 7 11 5
2018 4 15 22
2017 7 12 5
2017 3 19 9
2017 2 2 20
2016 11 19 5
2016 5 15 18
2017 4 2 4
2018 11 15 3
2018 9 2 15
2018 4 23 19
2018 12 30 3
2018 11 8 7
2018 12 11 16
2017 9 15 4
2019 3 3 17
2019 2 23 16
2017 5 13 21
Cluster # =  19  finished
Cluster 

2016 9 12 17
duplicate found in cluster: 20 on  125 th day at  19 th hour
2017 7 13 20
2019 1 28 14
2017 4 9 15
2016 11 14 11
2018 5 4 4
2019 3 4 21
2019 1 28 17
2019 3 6 15
2019 1 18 23
2016 8 2 14
2017 1 6 7
2019 2 1 17
2016 12 2 19
2016 12 28 11
2019 2 24 16
2016 4 9 20
2016 11 14 19
2018 4 13 2
2017 11 10 9
2018 5 29 1
2019 3 14 11
2019 2 12 13
2018 2 22 13
2019 3 29 22
2016 11 25 14
2019 3 7 18
2017 7 2 4
2019 3 2 21
2016 9 4 2
2017 10 25 8
2017 10 22 4
2018 11 8 0
2017 8 6 14
2018 6 14 19
2019 1 2 1
2019 2 26 23
2017 9 28 5
2016 12 15 15
2019 1 25 15
2017 6 4 23
2019 2 7 10
2018 4 27 6
2019 2 16 18
2018 6 25 11
2019 3 1 3
2016 5 19 11
2016 12 4 17
2017 9 3 16
2016 11 9 12
2016 5 30 10
2016 10 14 21
2019 1 31 1
2018 6 16 22
2016 5 13 8
2018 12 24 23
2019 3 16 7
2017 12 6 11
2017 5 5 9
2017 11 14 23
2016 8 8 16
2017 11 13 0
2019 3 1 15
2017 3 28 1
2016 4 15 8
2018 10 5 8
2017 12 30 10
2018 5 13 4
2019 1 13 15
2016 7 14 18
2017 12 31 0
2019 2 4 13
2019 3 6 0
2018 12 27 7
2017 8 10 8

2017 9 1 15
2017 12 4 9
2018 3 4 4
2017 3 24 5
2018 11 9 14
2018 7 23 13
2017 8 10 15
2017 6 14 8
2019 2 16 4
2016 11 27 4
2017 6 29 18
2019 2 21 15
2019 2 11 11
2016 7 28 0
2019 2 14 15
2019 1 27 2
2017 3 10 20
2016 7 18 4
2017 7 9 9
2019 2 10 3
2018 3 21 13
2019 3 30 22
2018 7 10 2
2019 3 24 6
2016 11 12 15
2017 5 30 15
2016 12 25 19
2016 8 16 10
2018 5 21 19
2017 3 12 23
2017 5 8 17
duplicate found in cluster: 22 on  213 th day at  7 th hour
2019 2 22 13
2019 2 1 18
2019 1 2 21
2018 9 19 19
2019 2 4 15
2019 1 26 13
2017 9 29 23
2018 8 7 11
2018 3 16 23
2018 7 31 3
2018 1 30 2
2018 8 19 1
Cluster # =  22  finished
Cluster # =  23
Cluster Density = 110
Cluster Lat =  37.320639514563105
Cluster Lng =  -121.99548312621367
2019 3 29 22
2018 10 25 13
2018 4 23 19
2016 7 17 4
2016 9 6 23
2019 1 4 11
2016 6 21 12
2017 7 25 5
2019 2 1 16
2018 9 15 21
2018 5 26 8
2019 2 16 13
2018 4 12 6
2017 7 19 11
2018 2 5 9
2017 2 18 7
2016 8 30 19
2019 2 26 11
2019 1 27 16
2017 5 28 13
2018 2 18 5
2016 7

2016 12 16 23
2017 6 17 10
2016 5 29 20
2016 5 10 7
2019 3 5 0
2019 2 7 6
2018 4 21 17
2019 1 15 15
2018 1 19 7
2017 8 22 19
2018 6 4 16
2017 6 12 1
2019 3 9 20
2019 2 7 13
2016 10 2 7
2017 5 29 4
2017 6 16 9
2016 4 28 1
2019 2 2 14
2019 2 14 19
2018 3 11 18
2019 1 30 19
2019 3 9 13
2016 10 20 4
2018 8 16 11
2019 1 27 20
2019 1 6 3
2018 3 2 20
2017 8 11 17
2016 9 22 12
2019 3 29 2
2018 1 8 15
2019 1 24 2
2018 9 1 14
2017 3 12 22
2018 5 25 11
2017 10 9 1
2019 2 23 1
2016 4 26 23
2017 3 28 22
2016 6 8 5
2016 8 2 10
2016 8 18 1
2016 12 22 14
2017 4 11 8
2016 12 4 9
2018 9 28 5
2018 5 27 17
2019 3 29 15
2019 1 16 10
2017 12 21 13
2016 8 10 12
2017 4 25 8
2017 12 12 0
2017 11 23 17
2016 10 20 22
2016 5 22 22
2017 11 27 17
2019 3 30 9
2019 2 11 3
2017 4 5 5
2018 10 7 22
2019 3 6 23
2018 5 22 14
2017 6 15 15
2019 1 13 2
2018 7 12 3
2018 12 26 6
2018 7 27 9
2018 3 26 12
2019 2 1 1
2018 10 14 2
2019 3 29 1
2016 9 22 18
2017 5 2 20
2016 10 12 1
2017 7 8 16
2017 7 6 10
2016 5 17 22
2016 9 21 18
2

2018 5 22 10
2016 8 22 11
2017 4 21 23
2019 3 30 16
2019 1 25 20
2016 5 11 1
2016 5 5 14
2019 1 16 3
2019 3 20 17
2018 11 24 10
2018 11 25 15
2018 12 27 14
2016 12 20 4
2017 3 26 21
2018 7 23 12
2017 4 13 19
2019 3 11 23
2016 9 11 12
2019 2 23 7
2019 2 12 2
2017 11 19 23
2018 7 2 8
2017 7 7 0
2018 2 20 9
2019 1 19 0
2018 2 26 2
2019 2 2 23
2016 11 10 15
2018 6 10 1
2017 1 24 15
2017 7 4 17
2018 4 19 16
2016 8 22 15
2017 5 9 4
2019 2 4 13
2016 7 7 2
2018 12 23 19
2016 12 23 12
2018 11 16 11
2018 3 5 18
2018 10 11 21
2017 5 5 11
2016 9 5 20
2017 3 12 16
2016 7 4 9
2017 12 5 22
2018 12 3 20
2016 5 9 16
2017 3 27 4
2018 8 28 21
2016 10 27 5
2018 12 29 5
2016 5 22 7
2016 5 2 20
2016 4 7 4
2017 4 4 14
2017 5 4 6
2017 6 30 16
2019 2 21 19
2018 9 11 4
2019 1 1 14
2018 7 3 12
2016 3 26 7
2018 2 2 1
2018 2 25 8
2016 6 12 0
2016 5 14 2
2016 12 9 22
2019 1 3 15
2017 7 30 17
2016 11 8 15
2017 5 16 17
2019 2 20 11
2019 2 7 15
2018 7 28 6
2017 11 15 2
2019 1 2 0
2018 12 13 1
2019 2 6 16
2018 1 26 8
2

2019 3 15 12
2018 2 13 15
2018 6 26 16
2016 8 27 7
2018 7 17 12
2018 7 30 11
2019 3 10 18
2018 3 22 17
2019 1 8 1
2019 1 30 9
2017 7 25 7
2016 6 11 18
2016 5 15 1
2016 3 23 16
2019 1 8 19
2016 12 29 23
2016 6 17 17
2017 9 21 6
2017 7 10 12
2017 12 9 21
2018 8 7 21
2016 12 12 22
2016 6 24 11
2019 1 4 22
2017 1 1 9
2018 6 17 0
2017 11 9 10
2018 9 3 22
2019 1 6 13
2018 7 14 20
2016 10 8 19
2016 3 29 17
2017 4 13 18
2018 6 24 16
2017 7 18 15
2017 10 3 5
2018 4 19 16
2019 2 27 21
2019 2 14 11
2019 3 9 15
2017 9 30 16
2017 9 24 23
2016 12 23 11
2018 3 16 18
2017 12 4 23
2019 2 7 3
2018 4 13 10
2019 1 13 22
2016 4 18 4
Cluster # =  29  finished
Cluster # =  30
Cluster Density = 113
Cluster Lat =  37.349283522522526
Cluster Lng =  -121.8611105675676
2018 2 11 4
2016 9 18 2
2018 10 3 15
2017 3 9 22
2018 9 15 10
2019 1 25 17
2016 4 11 12
2017 7 6 0
2016 7 13 7
2018 6 30 19
2018 7 24 9
2016 7 18 16
duplicate found in cluster: 30 on  111 th day at  19 th hour
2018 10 27 6
2017 4 14 15
2018 10 29 1

2018 9 3 7
2019 2 14 8
2019 2 7 10
2019 3 1 10
2018 8 6 6
2019 2 19 8
2018 1 18 9
2017 9 11 13
2017 2 15 8
2018 3 8 19
2018 3 11 7
2019 2 19 11
2017 9 23 8
2017 8 15 12
2019 1 25 14
2019 3 22 6
2019 2 9 9
2016 8 25 12
2016 12 22 6
2016 11 1 15
2018 11 12 19
2018 7 4 21
2016 10 5 0
2016 6 23 14
2018 3 25 19
2018 11 28 5
2016 6 13 21
2019 3 25 1
2017 3 9 7
2017 11 13 10
2018 12 26 1
2017 5 27 8
2019 1 30 22
2018 3 1 23
2017 9 3 9
2018 5 31 20
2017 5 31 16
2018 10 31 9
2016 12 31 13
2016 4 21 7
2017 9 11 10
2018 6 12 2
2017 5 29 5
2017 11 27 15
2019 3 10 23
2017 7 13 0
2018 4 6 9
2019 1 18 5
2017 11 8 23
2016 6 27 0
2019 1 5 1
2019 1 26 11
2019 1 7 9
2019 3 13 8
2016 10 21 5
2017 10 4 11
2019 2 9 12
2016 5 16 18
2016 12 15 16
2019 1 15 22
2018 4 1 13
2018 11 4 5
2017 6 1 11
2019 2 14 2
2019 2 24 13
2016 8 9 19
duplicate found in cluster: 32 on  355 th day at  10 th hour
2019 3 3 1
2017 12 21 6
2019 1 28 12
2016 12 12 6
2018 12 20 8
2018 5 28 20
2019 2 1 22
2018 4 29 20
2018 11 19 14
2016 

2017 5 14 17
2016 5 19 0
2018 9 12 13
2017 7 6 10
2019 1 7 4
2018 1 24 23
2018 12 8 17
2017 1 8 13
2019 2 1 4
2016 10 25 1
2016 6 24 20
2019 1 25 15
2018 7 2 3
2016 4 12 5
2016 7 19 13
2019 3 30 1
2019 2 5 9
2016 12 6 7
2017 11 10 1
2019 1 24 23
2019 3 5 16
2016 5 19 13
2019 1 16 23
2018 7 6 5
2017 6 24 19
2016 11 10 20
2019 1 21 12
2017 11 19 5
2016 12 14 15
2016 7 10 21
2019 1 11 5
2019 1 20 17
2017 8 7 8
2019 1 10 7
2018 6 7 2
2017 3 10 18
2018 4 23 6
2016 9 24 13
2018 12 16 6
2019 1 31 4
2017 7 31 7
2019 3 7 12
2018 4 7 14
2017 6 20 23
2017 6 25 20
2016 7 4 9
2019 2 19 17
2019 1 16 6
2019 2 2 10
2016 8 13 1
2017 10 26 18
2019 3 22 14
2016 5 31 3
2017 12 25 15
2019 2 22 22
2019 2 20 14
2016 5 24 1
2017 2 11 3
2018 4 18 9
2019 2 4 3
2018 1 14 15
2016 12 10 15
2018 5 10 15
2018 9 6 21
2017 3 18 18
2018 9 14 21
2017 4 2 18
duplicate found in cluster: 32 on  39 th day at  9 th hour
2019 2 23 14
2018 6 14 12
2016 4 23 15
2019 3 7 3
2017 1 8 17
2016 8 1 21
2017 7 9 1
2016 7 14 22
2016 12 

2018 4 10 10
2017 12 27 8
2017 7 27 3
2016 4 14 17
2019 1 19 20
2017 5 2 17
2018 1 7 20
2017 10 14 5
2017 4 2 20
2016 3 28 20
2016 6 28 23
2018 4 15 7
2018 11 29 2
2018 1 4 21
2019 2 14 1
2016 9 20 14
2019 3 15 2
2016 7 12 13
2019 2 22 5
2016 11 13 12
2018 6 21 21
2018 3 4 10
2017 10 14 21
2019 1 10 22
2017 2 8 13
2019 1 12 0
2019 1 31 12
2017 5 24 8
2016 11 23 20
2017 7 25 0
2018 7 13 2
2019 1 19 1
2017 10 17 5
2019 2 2 8
2016 8 19 21
2017 12 20 21
2016 4 9 23
2017 5 8 23
2017 8 6 13
2016 6 28 0
2017 8 26 12
2019 1 10 4
2018 9 26 10
2017 11 23 13
2016 6 2 19
2019 3 2 1
2016 8 14 17
2017 4 8 20
2018 6 18 11
2018 8 19 0
2019 3 19 22
2018 12 30 1
2019 1 15 9
2016 9 9 10
2018 2 14 22
2016 4 12 6
2016 10 4 17
2019 2 25 1
2017 7 10 1
2019 3 7 21
2019 2 20 0
2019 1 2 6
2019 3 15 15
2016 8 1 8
2016 10 20 3
2016 4 12 15
2018 12 2 0
2019 3 24 17
2016 10 17 15
2017 5 31 7
2018 9 10 7
2017 12 1 14
2019 1 10 9
2018 12 24 4
2018 4 15 2
2018 8 16 19
2016 3 25 11
2018 4 12 16
2019 2 3 8
2016 8 19 23


2016 10 15 22
2016 4 16 1
2017 2 19 22
2016 9 13 11
2018 8 22 12
2016 9 30 4
2016 8 17 15
2018 11 11 18
2019 3 13 9
2016 8 10 8
2016 12 18 15
2018 8 25 5
2017 7 24 20
2016 12 28 0
2018 2 3 13
2016 12 3 2
2018 11 2 8
2017 8 30 0
2019 2 2 9
2018 7 23 11
2018 11 15 20
2017 1 26 19
2016 12 19 7
2018 3 15 13
2016 8 15 16
2016 12 27 19
2018 9 14 18
2019 2 28 11
2019 3 28 13
2017 3 20 10
2016 6 22 1
2018 9 22 7
2017 10 26 1
2018 1 27 2
2018 7 16 9
2017 12 7 2
2018 7 27 5
2018 11 24 15
2018 8 9 22
2017 5 6 18
2018 8 14 13
2019 2 23 13
2016 9 27 21
2017 5 20 19
2016 5 13 15
2019 1 6 2
2019 3 22 2
2018 9 26 9
2016 10 17 9
2016 5 26 20
2017 2 24 18
2016 4 20 12
2017 3 21 11
2018 2 12 15
2018 1 19 17
2016 10 4 6
2018 11 3 9
2019 3 5 22
2018 10 21 20
2016 9 29 10
2016 7 23 14
2016 11 15 7
2016 7 27 6
2019 3 22 13
2017 11 14 14
2018 8 19 17
2016 4 2 10
2018 11 14 23
2017 3 28 16
2016 11 22 8
2017 4 10 6
2016 11 20 22
2018 3 2 12
2018 8 30 19
2018 10 19 18
2016 11 12 5
2019 2 5 3
2017 6 7 12
2019 3 2

2017 11 27 7
2019 2 2 21
2017 3 6 16
2016 9 10 18
2019 2 12 7
2019 2 8 8
2018 4 5 2
2019 2 6 5
2017 5 3 14
2016 4 10 23
2016 6 27 21
2016 7 19 23
2017 1 12 2
2017 4 13 18
2017 5 1 1
2018 6 11 12
2018 7 5 3
2017 5 30 20
2016 12 20 21
2018 8 22 6
2016 10 31 20
2018 1 29 16
2019 3 1 18
2016 9 8 0
2017 8 21 13
2018 3 14 12
2019 3 11 17
2017 8 4 20
2016 8 12 4
2017 3 11 22
2019 2 3 12
2018 8 24 10
2016 9 25 8
2017 6 6 10
2019 1 13 13
2019 3 29 6
2018 2 26 7
2018 5 14 18
2019 2 27 6
2019 1 5 7
2019 2 21 0
2019 2 6 6
2019 2 4 18
2019 3 25 19
2019 2 15 17
2018 7 29 9
2018 4 24 4
2017 8 16 2
2019 1 8 3
2016 6 17 19
2019 3 17 23
2019 3 5 1
2016 12 12 23
2019 1 23 1
2017 3 3 16
2017 10 9 0
2019 2 12 17
2017 10 5 23
2017 2 6 3
2017 9 26 23
2016 4 1 23
2017 8 2 1
2017 9 21 5
2019 1 22 22
2016 11 21 22
2017 10 13 22
2019 2 4 14
2016 6 17 14
2017 11 17 12
2018 6 21 11
2017 6 25 18
2018 6 16 19
2016 6 11 6
2018 12 1 19
2016 7 30 2
2016 5 21 12
2018 11 13 5
2018 6 15 11
2017 7 23 0
2016 7 21 5
2017 3 1

2017 4 14 10
2018 8 24 9
2017 12 11 6
2019 1 1 15
2018 5 21 0
2016 9 5 1
2018 8 19 23
2019 1 1 21
2016 10 29 23
2018 4 9 20
2017 10 2 3
2016 12 10 22
2016 10 26 23
2018 4 2 0
2016 7 30 20
2016 3 26 19
2017 1 30 10
2017 10 13 5
2018 5 26 4
2016 8 14 21
2016 6 30 18
2017 12 31 4
2018 7 22 1
2018 11 26 11
2018 4 5 10
2016 8 11 13
2018 10 21 10
2018 10 17 9
2018 7 5 10
2017 2 17 7
2018 11 27 18
2017 7 25 6
2018 8 14 9
2016 10 15 17
2017 6 1 4
2018 3 28 20
2016 11 25 12
2016 11 12 6
2018 11 26 6
2018 4 8 5
2019 3 22 1
2019 1 8 3
2018 12 4 19
2018 7 2 0
2016 11 13 14
2018 6 15 0
2019 3 23 2
2018 5 7 12
2019 3 4 18
2018 3 24 0
2019 3 23 17
2016 6 9 17
2018 7 9 19
Cluster # =  47  finished
Cluster # =  48
Cluster Density = 78
Cluster Lat =  37.25073154545454
Cluster Lng =  -121.90598805194807
2019 1 6 20
2018 11 22 1
2017 1 29 5
2019 3 27 16
2016 10 29 23
2016 8 25 20
2017 7 22 5
2017 5 23 2
2019 3 26 15
2019 1 5 10
2018 11 8 11
2016 11 17 22
2018 12 1 19
2018 7 10 21
2016 11 5 14
2016 7 28 9


2016 6 25 2
2016 8 6 13
2017 10 29 12
2016 5 5 8
2017 4 14 16
2018 11 22 8
2018 6 16 23
2019 2 6 5
2017 11 27 9
2019 3 26 10
2019 2 20 13
2016 3 30 22
2019 2 26 0
2019 2 25 12
2018 6 16 23
2019 3 22 4
2018 7 19 17
duplicate found in cluster: 52 on  145 th day at  11 th hour
2018 6 8 2
2018 2 28 23
2018 2 3 4
2016 12 22 23
2017 6 8 1
2016 4 21 1
2019 2 25 18
2017 12 24 17
2017 8 1 4
2019 3 17 19
2016 7 9 18
2016 5 10 2
2016 5 2 12
2016 10 26 3
2018 2 19 16
2016 5 12 7
2018 8 22 16
2017 12 4 15
2016 9 6 7
2019 3 12 22
2018 10 28 9
2016 8 9 15
2018 6 10 14
2018 11 11 16
2016 8 14 19
2017 4 18 20
2019 3 19 2
2019 1 26 17
2018 6 28 22
2017 10 20 22
2016 8 29 1
2017 8 13 12
2016 10 30 7
2018 2 24 2
2019 1 4 9
2018 7 30 3
2019 2 11 13
2018 6 15 14
2019 1 10 15
2019 2 7 7
2018 7 8 21
2019 1 7 10
2017 10 20 0
2017 4 14 21
2018 12 11 5
2018 2 16 22
2019 1 14 12
2019 1 17 18
2016 9 27 12
2018 12 6 7
2016 5 13 16
2016 10 12 12
2016 7 26 1
2016 8 20 13
2018 4 22 12
2018 10 15 21
2017 2 26 9
2019 3 

2016 6 14 10
2019 3 13 5
2016 9 17 8
2017 8 11 23
2017 4 17 17
2019 2 27 22
2016 5 10 22
2017 4 11 19
2017 10 9 6
2018 3 6 10
duplicate found in cluster: 52 on  88 th day at  7 th hour
2018 8 29 2
2018 12 17 19
2019 3 24 14
2018 9 28 23
2018 9 10 15
2019 2 21 18
2017 9 2 12
2017 6 2 16
2016 10 4 14
2018 1 31 20
2019 3 20 18
2018 10 6 14
2018 5 13 10
2019 1 1 6
2017 9 7 3
2017 5 31 8
2016 10 22 10
2016 7 15 3
2019 3 17 2
2019 1 26 4
2016 4 9 13
2018 12 30 16
2016 5 20 14
2018 5 25 2
2016 3 29 21
2018 7 5 23
2016 6 22 11
2019 3 26 1
2019 1 1 6
2019 2 6 14
2018 9 9 1
2019 2 25 3
2016 8 26 14
2016 5 25 21
2018 8 8 20
2018 8 25 3
2019 1 26 13
2016 11 13 5
2018 5 21 16
2018 9 16 4
2016 5 7 7
2019 2 12 22
2018 6 29 19
2018 3 4 11
2019 2 28 19
2017 8 29 8
2017 12 3 19
2019 2 7 7
2016 7 31 2
2018 1 4 11
2016 3 30 16
2019 1 15 4
2019 3 27 21
2016 8 16 16
2017 7 20 0
2019 3 28 21
2016 5 27 19
2017 8 19 22
2019 1 25 10
2017 9 18 9
2019 3 4 3
2017 1 25 4
2017 9 23 20
2017 2 15 11
2019 2 27 12
2017 

2019 3 13 8
2016 10 19 1
2017 11 10 13
2018 12 4 1
2016 4 15 23
2018 9 20 22
2017 9 27 5
2016 9 27 12
2017 12 31 19
Cluster # =  57  finished
Cluster # =  58
Cluster Density = 51
Cluster Lat =  37.20959696
Cluster Lng =  -121.72270096000004
2019 2 15 8
2016 12 11 6
2019 3 10 3
2019 3 11 17
2019 2 19 6
2019 3 1 16
2018 11 3 12
2017 8 19 23
2018 3 6 1
2017 3 18 20
2017 10 5 15
2016 12 27 13
2017 10 22 19
2016 8 17 21
2017 12 4 20
2018 10 14 9
2019 2 12 2
2016 8 15 6
2019 1 25 4
2016 4 4 6
2018 3 25 1
2016 8 28 3
2019 1 4 18
2019 2 11 5
2019 2 24 22
2018 9 4 3
2017 9 29 13
2016 8 26 1
2016 10 12 22
2017 5 23 6
2016 3 28 20
2018 1 30 3
2019 3 16 19
2016 3 28 8
2019 1 12 0
2016 6 19 20
2019 3 25 13
2018 11 20 20
2018 3 30 18
2019 2 10 6
2017 4 23 6
2017 3 21 7
2018 7 21 16
2019 1 20 0
2019 1 11 7
2018 12 8 19
2019 1 20 10
2018 5 15 22
2016 11 7 10
2016 9 15 2
2017 9 25 21
2017 6 29 10
2018 10 12 21
2016 4 26 5
2018 4 24 3
2017 9 11 12
2017 9 29 19
2019 2 1 11
2018 5 11 3
2018 8 31 23
2018 1

2019 1 13 10
2016 10 22 17
2017 1 9 9
2019 3 19 8
Cluster # =  62  finished
Cluster # =  63
Cluster Density = 49
Cluster Lat =  37.355935956521726
Cluster Lng =  -121.90948717391308
2017 12 6 22
2016 7 27 6
2017 8 24 15
2018 1 19 2
2016 12 14 22
2017 7 10 16
2018 5 8 13
2017 2 10 20
2017 12 6 0
2016 12 29 14
2016 4 16 16
2018 4 10 3
2018 7 4 5
2018 11 30 22
2018 12 10 1
2016 11 6 5
2018 8 27 9
2017 8 5 2
2017 8 8 12
2018 6 3 7
2016 10 30 15
2018 12 7 16
2019 3 2 5
2016 4 30 8
2016 12 24 21
2017 9 6 23
2016 9 13 11
2017 5 17 3
2016 9 2 2
2017 11 22 8
2018 4 18 3
2018 5 5 18
2018 10 24 23
2016 5 5 2
2016 6 10 20
2016 5 9 13
2017 8 29 6
2018 1 9 22
2019 1 17 2
2016 6 11 3
2016 10 3 19
2016 12 22 5
2019 3 30 18
2019 3 16 14
2019 2 27 20
2016 9 24 20
2019 2 26 7
2018 7 15 0
2017 10 4 15
2019 2 26 11
2018 8 6 7
2016 5 8 7
2016 4 21 16
2018 4 15 22
2019 1 10 23
2018 11 2 14
2016 11 18 22
2019 2 9 4
2017 2 23 8
2019 1 13 7
2017 8 7 4
2019 2 4 8
2017 12 9 10
2019 2 18 17
2017 9 29 22
2017 11 1 

2019 1 14 17
2018 2 17 15
2019 3 6 12
2016 12 11 1
2016 4 5 6
2019 2 24 6
2017 6 3 9
2017 8 6 4
2016 9 22 8
2017 12 8 0
2016 7 26 4
2017 4 16 4
2017 9 21 0
2016 6 19 2
2017 7 11 22
2016 8 4 20
2019 1 5 19
2018 12 30 5
2019 3 29 20
2016 11 16 10
2017 12 19 6
2018 10 13 13
2017 12 31 12
2018 10 25 4
2019 1 8 21
2017 12 26 9
2019 1 3 16
2019 2 17 12
2018 4 11 14
2017 2 15 11
2018 3 20 1
2017 1 14 12
2016 8 12 2
2019 3 9 9
2018 1 30 16
2016 7 27 2
2017 10 19 11
2017 12 9 7
2018 9 25 6
2018 11 15 16
2019 3 24 14
2019 2 12 22
2018 5 14 13
2017 7 2 17
2016 9 7 3
2018 1 11 6
2018 11 20 10
2019 2 1 21
2016 6 30 8
2017 12 27 12
2016 11 25 1
2019 2 9 8
2016 7 19 8
2018 1 28 11
2019 2 8 11
2018 9 16 15
2016 12 31 4
2018 1 3 9
2018 9 1 0
2017 2 14 21
2017 6 19 12
2019 2 21 22
2018 10 31 1
2019 1 2 6
2019 2 6 21
2016 5 5 23
2016 10 31 4
2017 8 25 13
2016 7 24 9
2018 12 18 11
2018 1 23 23
2016 5 26 20
2016 10 25 17
2019 3 25 0
2016 5 15 8
Cluster # =  67  finished
Cluster # =  68
Cluster Density = 10

2017 4 5 9
2016 4 4 21
2018 3 20 13
2019 3 12 21
2017 7 13 15
2016 12 12 14
2018 11 20 20
2018 1 25 19
2016 9 30 21
2018 1 19 19
2018 11 17 3
2019 3 2 4
2016 7 29 16
2018 1 19 22
2019 1 10 9
2019 2 20 5
2017 3 19 8
2019 1 12 18
duplicate found in cluster: 70 on  82 th day at  8 th hour
2017 9 28 18
2019 1 16 13
2019 2 23 11
2016 10 4 10
2019 1 24 7
2017 4 19 5
2017 4 13 19
2019 3 20 19
2019 1 8 18
2019 3 2 10
2016 8 25 6
2019 2 24 12
2019 3 28 3
2016 11 19 2
2016 11 17 18
2019 3 9 7
2016 7 8 22
2019 1 30 5
2019 1 15 9
2019 1 20 12
2018 10 15 18
2018 3 16 18
2018 12 31 16
2017 5 28 18
2016 9 12 9
2018 2 12 13
2018 12 16 14
2016 6 22 3
2019 3 2 23
2019 3 24 17
2016 4 17 19
2016 8 15 3
2018 6 8 21
2019 1 18 19
2016 8 4 23
2016 10 16 21
2017 6 17 21
2019 2 23 19
2018 2 1 8
2016 9 16 21
2019 2 1 17
2019 2 1 18
2016 7 9 18
2016 8 26 22
2019 3 30 6
2018 10 17 12
2016 6 15 19
2017 7 15 3
2016 4 27 1
2016 9 7 23
2019 1 16 18
2017 4 3 6
2018 5 4 3
2017 1 28 11
2019 1 25 0
2018 10 22 2
2018 12 18

2018 10 9 5
2018 10 31 15
2017 11 23 10
2016 11 12 23
2019 2 11 20
2017 1 19 11
2016 12 21 12
2016 4 21 10
2018 11 16 17
2018 5 17 2
2019 3 18 5
2019 1 7 10
2018 11 3 2
2019 2 18 12
2019 3 11 16
2019 3 15 19
2016 12 15 1
2019 3 12 20
2018 12 28 21
2016 10 26 18
2016 10 30 0
2016 11 16 2
2016 6 6 15
2019 2 24 9
2016 7 12 17
2019 3 2 7
2016 12 2 14
2016 6 23 17
2019 2 23 9
2019 2 15 13
2016 7 30 16
2017 2 21 21
2016 10 12 1
2017 2 12 6
2016 4 7 10
2017 2 1 10
2016 10 3 9
2018 7 7 21
2017 4 8 20
2019 3 11 10
2019 2 4 12
2018 9 24 18
2016 4 17 6
2017 9 5 4
2016 4 25 18
2016 4 14 11
2019 1 24 13
2018 11 4 21
2018 8 5 19
2016 6 7 2
2016 10 21 10
2018 1 3 14
2019 2 1 8
2017 2 25 13
2016 4 21 7
2017 10 4 16
2018 3 3 10
2016 5 7 19
2017 9 2 6
2017 1 27 16
2018 2 22 23
2018 5 2 12
2017 10 23 0
2019 3 30 17
2016 12 26 3
2016 10 9 0
2019 3 29 7
2019 3 5 11
2017 9 18 12
2017 4 9 8
2019 1 15 23
2017 2 3 1
2016 6 12 11
2018 3 2 6
2019 1 30 8
2017 8 7 14
2016 12 15 11
2019 1 27 20
2019 3 14 20
2016 11

2017 12 5 7
2019 2 1 2
2017 9 24 9
2017 7 22 6
2018 7 10 9
2017 8 8 7
2017 11 29 15
2017 5 12 20
2016 11 21 6
2018 4 5 9
2017 2 11 19
2018 12 25 7
2019 1 2 2
2018 8 4 20
2017 9 10 10
2018 1 11 12
2019 1 17 10
2019 2 8 2
2017 8 15 11
2017 8 24 22
2017 4 28 12
2016 7 26 3
2016 7 24 10
2016 11 7 22
2017 1 11 12
2016 10 16 11
2016 11 19 13
2016 11 22 3
2017 8 4 6
2016 5 10 21
2018 10 8 16
2019 2 26 20
2019 2 22 8
2017 3 21 12
2016 9 17 12
2017 9 10 11
2016 12 5 13
2019 3 30 15
2018 11 4 7
2018 10 21 15
2017 10 29 0
2018 2 7 12
2016 6 27 7
2018 1 10 9
2016 4 4 12
2018 1 17 3
2018 7 2 20
2017 11 24 20
2016 10 25 13
2017 2 18 0
2017 4 2 8
2016 4 16 5
2019 3 17 2
2017 1 13 19
2018 10 21 21
2017 4 3 16
2016 5 1 22
2017 1 7 21
2016 10 2 18
2019 2 17 1
2017 7 17 14
2018 1 28 18
2016 6 22 9
2017 1 24 1
2018 9 20 15
2016 7 21 5
2019 1 14 21
2016 9 20 19
2017 7 9 23
2016 9 1 9
2019 2 22 15
2016 9 20 19
2019 1 8 13
2018 7 1 23
2017 7 9 13
2018 1 21 17
2018 11 7 8
2019 2 15 21
2019 1 18 13
2018 1 25 6

2019 3 11 23
2017 2 2 9
2016 6 21 0
2018 2 5 19
2016 6 15 21
2016 6 29 19
2017 5 10 6
2018 12 26 0
2016 10 29 6
2019 2 8 20
2018 1 18 19
2017 11 29 18
2019 2 26 13
2018 12 2 4
2019 1 31 19
2019 1 6 1
2017 1 7 2
2016 5 9 18
2016 10 28 0
2018 9 20 13
2019 1 20 2
2019 2 2 22
2019 3 19 9
2019 3 23 22
2016 9 11 21
2017 9 14 22
2018 11 21 10
2019 1 15 21
2017 6 21 7
2016 7 9 22
2016 10 18 17
2017 5 5 16
2017 6 7 12
2018 7 22 14
2017 9 2 9
2019 2 18 10
2019 2 25 12
2018 12 30 15
2016 5 2 5
2019 3 26 1
2017 11 22 22
2018 6 29 11
2016 10 30 4
2016 3 26 21
2019 3 17 0
2018 4 19 10
2019 1 7 10
2017 4 8 22
2018 6 9 12
2017 9 29 13
2017 3 25 6
2019 3 26 6
2016 8 31 6
2019 2 5 2
2017 8 2 4
2017 10 21 13
2018 3 4 23
2019 3 20 13
2018 2 28 2
2018 5 11 2
2018 12 1 19
2016 11 23 8
2018 11 10 3
2018 6 13 19
2016 8 12 8
2019 1 9 16
2018 8 31 23
2018 7 1 14
2018 7 8 13
2018 3 28 3
2019 3 30 14
2018 11 3 18
2018 3 7 16
2018 8 16 10
2018 2 14 0
2016 12 2 20
2016 5 8 18
2018 7 25 21
2017 7 10 5
2019 3 21 18
2

2019 1 14 7
2016 4 9 15
2018 5 20 5
2016 9 30 11
2017 5 9 10
2016 11 14 21
2016 12 4 12
2016 12 5 22
2016 9 20 9
2017 3 9 11
2017 2 20 8
2019 1 3 18
2018 4 27 5
2018 7 8 17
2017 5 10 13
2017 7 31 18
2017 2 28 16
2019 3 10 13
2017 4 7 12
2017 6 17 5
2019 3 2 23
2019 2 26 18
2018 4 1 10
2018 4 6 10
2018 10 15 2
2018 2 15 8
2019 2 5 14
2016 9 3 13
2017 2 15 10
2018 1 15 20
2019 3 30 15
2018 7 15 4
2019 1 5 13
2016 11 14 14
2018 11 19 5
2018 7 9 15
2016 12 23 6
2019 3 24 16
2018 7 31 4
2017 4 19 12
2018 3 13 23
2018 7 11 14
2018 10 1 14
2017 9 28 6
2016 11 19 8
2017 1 13 8
2018 7 28 6
2019 1 18 3
2019 1 10 12
Cluster # =  87  finished
Cluster # =  88
Cluster Density = 31
Cluster Lat =  37.329385161290325
Cluster Lng =  -121.84182038709677
2018 12 17 18
2019 2 15 16
2019 3 23 23
2019 3 29 12
2018 2 28 3
2018 10 23 8
2019 1 15 7
2018 6 30 6
2016 12 28 20
2019 1 14 23
2019 3 24 7
2018 2 14 7
2018 6 27 15
2016 8 23 14
2017 7 20 22
2017 8 18 2
2017 7 3 6
2017 8 7 0
2018 11 19 15
2017 2 16 2
201

2018 12 18 2
2018 3 28 0
2016 12 9 11
2019 1 28 22
2019 3 16 13
2019 2 23 23
2018 8 3 3
2019 1 9 7
2019 2 7 2
2017 7 21 11
2016 6 2 14
2016 6 30 8
2017 3 18 6
2017 3 23 17
2019 2 16 9
2017 7 30 0
2018 2 22 19
2019 2 5 6
2017 10 8 5
2017 4 28 14
2018 7 16 6
2017 11 20 6
2018 12 10 15
2019 1 12 20
Cluster # =  93  finished
Cluster # =  94
Cluster Density = 83
Cluster Lat =  37.2737277179487
Cluster Lng =  -121.86274721794877
2017 1 30 12
2018 7 20 2
2016 6 16 17
2018 4 19 2
2019 2 5 10
2019 1 27 5
duplicate found in cluster: 94 on  5 th day at  3 th hour
2016 12 23 8
2018 12 10 1
2016 3 23 1
2018 9 16 4
2018 1 24 16
2019 3 13 11
2018 4 25 13
2016 8 3 6
2018 9 2 6
2019 1 2 21
2019 1 17 3
2016 12 17 22
2016 7 14 15
2016 12 21 4
2016 9 26 17
2017 6 24 15
2017 4 25 8
2019 2 21 7
2016 12 25 15
2018 8 28 18
2019 2 5 6
2019 2 24 23
2016 4 17 19
2016 8 31 20
2018 6 21 23
2017 7 15 18
2018 6 8 13
2019 2 26 23
2019 3 15 13
2016 8 25 10
2019 3 10 17
2016 4 20 1
2019 2 1 15
2016 12 30 4
2019 3 2 17


2018 8 12 20
2017 3 30 22
2017 1 8 1
2018 1 19 12
2019 2 3 0
2017 9 12 20
2018 12 10 22
2017 5 15 23
2019 2 27 12
2017 1 12 9
2019 2 4 12
2016 11 4 12
2016 11 15 23
2019 1 20 17
2016 10 22 23
2016 9 30 14
2016 10 6 7
2018 11 23 10
2019 1 21 18
2018 2 4 3
2019 1 7 5
2019 3 16 8
2017 9 12 21
2018 12 28 22
2016 5 13 11
2019 3 13 20
2017 7 24 11
2018 9 27 2
2017 8 9 8
2016 11 22 7
2016 12 18 19
2018 2 7 11
2017 12 30 4
2016 7 28 12
2019 2 4 8
2017 6 12 1
2018 3 20 14
2017 10 29 19
2016 12 14 6
2017 3 20 20
2017 6 11 22
2018 11 8 20
2019 1 31 22
2018 2 17 18
2018 3 20 19
2017 3 3 11
2019 3 29 2
2016 10 26 7
2016 6 2 5
2017 7 14 9
2019 1 19 3
2019 2 24 0
2019 2 14 4
2018 9 9 7
2019 1 30 17
2019 1 7 11
2018 9 19 3
2017 5 18 23
2018 8 2 10
2019 2 24 9
2018 9 29 5
2019 3 22 3
2017 9 2 2
2016 10 20 18
2019 1 28 20
2017 10 16 10
2018 6 7 4
2018 10 19 5
2019 1 8 9
2016 4 1 7
2016 3 31 12
2017 4 3 7
2016 10 11 4
2018 3 19 12
2019 2 3 0
2016 8 17 15
2016 4 22 19
2016 7 10 15
2016 9 14 8
2017 3 1 11


2016 10 29 21
2018 11 27 5
2018 6 5 5
2017 4 27 23
2018 7 2 19
2018 6 16 2
2019 2 15 6
2019 1 6 13
2017 12 15 8
2016 12 1 9
2017 9 16 10
2018 11 12 17
2017 12 19 23
2018 4 9 10
2017 9 2 18
2016 8 16 4
2016 4 2 13
2016 9 20 1
2017 11 29 23
2017 11 4 20
2018 2 21 10
2017 6 19 0
2018 1 14 15
2018 4 26 5
2016 7 10 11
2018 10 17 18
2018 9 13 14
2018 12 4 6
2016 7 17 21
2017 4 18 2
2019 2 17 16
2017 12 26 15
2018 12 15 22
2019 2 12 11
2017 12 8 4
2018 12 9 5
2016 5 13 16
2019 1 27 12
2018 2 27 13
2018 4 1 3
2017 9 17 13
2017 7 5 12
2019 2 25 7
2017 4 24 14
2016 8 12 21
2018 2 18 9
2018 5 2 18
2017 7 19 23
2018 8 22 3
2017 5 30 16
2019 1 21 14
2018 6 2 3
2017 9 29 23
2017 6 12 19
2019 2 6 3
2018 3 29 10
2017 2 2 22
2017 2 2 4
2017 7 18 11
2016 12 14 14
2018 7 6 11
2016 11 27 3
2019 3 13 11
2017 11 19 3
2018 3 10 13
2019 3 5 20
2019 1 10 4
2019 2 23 6
2017 7 4 19
2016 5 26 1
2018 12 2 13
2018 3 15 4
2018 8 10 4
2018 6 23 15
2019 1 20 7
2016 10 7 2
2018 8 6 5
2016 10 4 20
2016 5 7 5
2018 2 17 6

2017 12 29 17
2017 7 31 16
2019 1 15 13
2016 8 22 0
2018 6 1 12
2016 5 9 20
2017 8 8 20
2017 12 19 12
2019 2 23 22
2017 2 20 15
2018 7 28 13
2018 11 1 6
2019 1 7 8
2019 3 30 23
2016 4 10 12
2019 3 13 16
2019 3 24 12
2017 10 14 9
2017 11 17 23
2017 11 4 8
2017 11 2 22
2018 11 26 9
2017 10 15 5
2016 5 25 3
2019 2 3 12
2016 9 12 14
2017 1 4 4
2017 7 16 5
2016 6 17 21
2018 3 12 14
2019 1 16 7
2019 3 7 3
2019 2 21 5
2016 11 16 6
2019 1 3 13
2016 10 3 2
2016 11 1 9
2018 12 11 13
2018 3 30 5
2016 12 11 10
2016 10 15 21
2019 2 8 6
2016 10 19 18
2017 12 3 0
2016 6 25 8
2017 9 9 14
2018 10 24 19
2018 7 28 12
2018 10 27 1
2016 10 16 22
2019 1 21 18
2018 6 7 15
2019 3 9 7
2018 1 4 21
2016 9 22 8
2019 3 1 10
2019 2 3 14
2017 12 9 22
2019 3 10 6
2019 1 29 20
2016 12 1 13
2018 8 18 12
2019 1 9 23
2018 8 15 8
2017 6 3 9
2016 11 21 1
2019 1 11 10
2018 7 28 3
2017 4 20 1
2018 5 20 6
2017 7 20 9
2016 4 11 15
2016 8 20 18
2016 5 17 18
2019 3 25 6
2019 3 9 9
2018 3 17 4
2017 5 5 11
2016 6 10 10
2016 5 12 2

2017 3 24 16
2019 1 4 21
2017 11 10 11
2019 1 8 4
2016 3 29 17
2019 2 22 6
2019 3 10 18
2018 1 3 0
2018 9 16 1
2018 9 4 16
2019 3 25 8
2017 3 7 6
2017 4 12 21
2018 8 25 15
2016 5 11 6
2019 1 27 8
2018 12 26 7
2019 2 8 13
2016 4 9 8
2018 4 17 14
2019 2 10 16
2016 7 18 0
2016 6 10 2
2018 7 25 8
2017 1 31 10
2019 1 13 4
2019 1 5 16
2018 10 28 15
2016 6 15 12
2018 6 6 10
2018 10 7 1
2019 2 12 15
2018 7 10 3
2016 10 1 7
2018 1 30 17
2018 3 23 21
2018 2 2 17
2016 8 17 5
Cluster # =  114  finished
Cluster # =  115
Cluster Density = 62
Cluster Lat =  37.33248509836066
Cluster Lng =  -121.8449356229508
2019 1 30 17
2019 2 9 17
2019 3 7 3
2018 1 17 2
2018 3 30 11
2018 9 2 15
2019 3 14 1
2016 9 12 13
2017 11 26 19
2016 6 10 20
2017 2 1 8
2018 6 20 17
2016 8 1 9
2018 3 17 1
2018 2 22 6
2018 5 7 7
2018 12 26 6
2018 7 9 15
2019 1 29 15
2016 3 28 3
2019 3 9 0
2017 6 22 5
2017 7 26 8
2017 10 2 11
2017 4 23 19
2019 3 22 20
2016 8 4 20
2016 9 8 18
2017 9 14 14
2018 5 12 4
2019 2 26 12
2017 12 25 23
2018

2019 3 19 23
2016 12 6 10
2019 2 17 17
2019 1 23 22
2018 9 15 17
2017 1 4 19
2016 4 29 20
2019 2 3 9
2017 10 12 16
2016 8 7 10
2019 3 19 22
2016 10 31 2
2016 12 10 0
2016 7 20 11
2019 1 31 19
2016 4 4 6
2016 5 25 8
2016 9 9 12
2019 1 3 0
2019 3 25 22
2017 9 7 8
2018 3 11 2
Cluster # =  120  finished
Cluster # =  121
Cluster Density = 53
Cluster Lat =  37.345092775
Cluster Lng =  -121.9233888
2017 9 13 5
2016 4 16 10
2018 6 28 2
2019 2 17 8
2019 2 20 5
2019 2 27 15
2016 9 24 1
2017 9 24 17
2019 2 24 9
2018 6 4 21
2016 9 8 16
2016 6 2 22
2017 7 29 16
2017 12 21 7
2017 3 17 5
2017 1 4 12
2017 3 7 10
2016 7 21 13
2017 2 9 10
2016 4 20 10
2017 6 12 20
2017 5 19 18
2016 12 2 20
2017 10 15 15
2019 1 29 10
2019 2 15 2
2017 10 9 2
2018 2 17 18
2016 12 22 1
2019 2 1 1
2016 12 6 19
2017 5 10 20
2019 2 28 21
2016 12 9 19
2019 1 9 22
2017 11 10 12
2017 8 26 16
2017 10 18 19
2016 6 15 3
2019 1 13 15
2017 9 16 21
2018 9 3 13
2016 8 13 0
2016 10 23 8
2019 3 18 10
2019 3 26 18
2019 2 21 9
2019 3 17 2
2

2018 9 7 19
2017 10 14 16
2018 2 22 17
2018 10 20 1
2016 7 18 4
2019 3 9 15
2019 1 11 9
2019 1 10 21
2017 11 8 12
2018 12 5 23
2016 12 28 8
Cluster # =  129  finished
Cluster # =  130
Cluster Density = 35
Cluster Lat =  37.34289346875
Cluster Lng =  -121.901296
2016 4 11 8
2016 7 20 9
2017 11 2 22
2018 6 18 4
2017 8 10 0
2016 11 17 15
2018 4 2 2
2017 10 3 9
2019 2 7 0
2016 11 3 12
2017 9 18 6
2019 2 8 22
2017 3 24 23
2018 3 31 11
2017 6 6 14
2016 5 6 19
2018 12 4 10
2017 4 14 1
2017 12 20 5
2018 10 12 19
2016 12 28 1
2018 1 15 3
2018 6 9 23
2017 1 9 15
2019 2 22 12
2018 10 3 18
2017 5 7 6
2018 1 17 22
2017 12 14 1
2017 10 12 17
2018 8 14 11
2018 8 2 5
2018 8 22 11
2016 7 12 9
2018 2 24 3
2016 9 23 12
2016 4 21 9
2016 7 30 3
2016 7 11 23
2019 1 15 21
2016 12 8 20
2016 7 17 18
2016 10 18 9
2016 9 23 23
2018 5 9 8
2016 4 24 12
2018 3 2 17
2017 8 8 1
2018 2 8 14
2019 1 12 11
2019 2 3 1
2017 9 7 9
2016 11 4 15
2019 2 10 6
2017 8 1 20
2017 7 16 1
2018 4 10 21
2018 4 17 17
2016 4 14 23
2017 2

In [7]:
non_accident

Unnamed: 0,Temperature(F),Humidity(%),Visibility(mi),Wind_Speed(mph),Year,Month,Day,Day_of_Year,Hour,Cluster,Cluster_Lat,Cluster_Lng
0,66.23,37.0,9.997,8.50,2018,6,9,160,17,0,37.250607,-121.911475
1,48.76,77.0,10.000,2.00,2019,3,4,63,5,0,37.250607,-121.911475
2,59.76,86.0,8.337,0.32,2016,8,20,233,8,0,37.250607,-121.911475
3,71.38,33.0,9.997,5.26,2018,10,14,287,13,0,37.250607,-121.911475
4,66.71,52.0,9.997,0.78,2017,4,30,120,10,0,37.250607,-121.911475
...,...,...,...,...,...,...,...,...,...,...,...,...
22767,50.80,49.0,10.000,3.33,2019,2,7,38,18,137,37.327972,-121.875633
22768,62.99,35.0,9.997,2.60,2018,3,5,64,16,137,37.327972,-121.875633
22769,45.52,86.0,9.997,1.61,2016,12,23,358,20,137,37.327972,-121.875633
22770,63.77,34.0,9.997,2.84,2018,10,13,286,3,137,37.327972,-121.875633


In [8]:
data.shape

(11455, 18)

In [9]:
data['Accident'] = np.ones(data.shape[0]).tolist()

In [10]:
non_accident['Accident'] = np.zeros(non_accident.shape[0]).tolist()

In [11]:
data = data.append(non_accident)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


In [12]:
data

Unnamed: 0,Accident,Cluster,Cluster_Lat,Cluster_Lng,Day,Day_of_Year,Hour,Humidity(%),Month,Start_Lat,Start_Lng,Start_Time,Temperature(F),Visibility(mi),Weather_Condition,Weekday,Wind_Speed(mph),Year,Zipcode
0,1.0,0,37.250607,-121.911475,21,173,10,41.0,6,37.250729,-121.910713,2016-06-21 10:16:26,75.20,10.000,Clear,1.0,5.80,2016,95118.0
1,1.0,1,37.328977,-121.870144,21,173,10,53.0,6,37.328312,-121.871811,2016-06-21 10:17:17,73.00,10.000,Partly Cloudy,1.0,4.60,2016,95112.0
2,1.0,2,37.382332,-121.904376,21,173,11,27.0,6,37.382370,-121.904358,2016-06-21 11:30:58,84.00,10.000,Partly Cloudy,1.0,8.10,2016,95131.0
3,1.0,3,37.401425,-121.908851,21,173,12,24.0,6,37.401531,-121.908859,2016-06-21 12:53:58,87.10,10.000,Partly Cloudy,1.0,9.20,2016,95131.0
4,1.0,4,37.339455,-121.852372,21,173,14,16.0,6,37.339455,-121.852043,2016-06-21 14:34:20,93.20,10.000,Clear,1.0,15.00,2016,95122.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22767,0.0,137,37.327972,-121.875633,7,38,18,49.0,2,,,,50.80,10.000,,,3.33,2019,
22768,0.0,137,37.327972,-121.875633,5,64,16,35.0,3,,,,62.99,9.997,,,2.60,2018,
22769,0.0,137,37.327972,-121.875633,23,358,20,86.0,12,,,,45.52,9.997,,,1.61,2016,
22770,0.0,137,37.327972,-121.875633,13,286,3,34.0,10,,,,63.77,9.997,,,2.84,2018,


In [13]:
data.to_csv('TrainingData.csv', index = None, header=True) #Don't forget to add '.csv' at the end of the path