This code takes geolocation data and dates. The data is then plotted as a density plot.

Future work:
Add in date ranges to include (if applicable)
Remove data in files where a lat/lng column is present but doesn't have a location or lat/lng data

In [263]:
import folium
from geopy.geocoders import Nominatim
import numpy as np
from folium import plugins
import pandas as pd
#import geopandas
#import geojsoncontour

In [438]:
def get_midpoint(x):
    """finds the min and max of a numpy array
    x: numpy array of numerical values
    """
    return((x.min()+x.max())/2)

def make_map(locations,colors,name='Test'):
    geolocator = Nominatim(user_agent='Idaho')
    m = folium.Map()
    return(m)
    
def get_geocode_api(location):
    """Gets the lattitude and longitude of location
    location: geographical location or address"""
    place = geolocator.geocode(location)
    lat = place.latitude
    lng = place.longitude
    return(lat, lng)

def is_same_as_previous(l, idx):
    """Checks if the current index in a list is the same as previous"""
    if idx==0:
        return False
    else:
        return (l[idx-1]==l[idx])

def get_geocodes(df, addr_h = 'Address',lat_h = 'Lattitude',lng_h = 'Longitude', repeat_addresses = False):
    """Gets the lattitude and longitude of all locations and returns db with lat, lng values
    addr_h: column header for locations/addresses
    repeat_addresses: Bool, if True it will check to see if that address is already found
        - May be much faster than using API mulitple times, depending on addr_h column length and number of repeats etc"""
    
    if repeat_addresses==True: 
        df = df.sort_values(by=[addr_h], ignore_index=True) #Reduce computation time when finding repeats
    for i in range(len(df)):
        if repeat_addresses and is_same_as_previous(df[addr_h],i):
            df.loc[i,[lat_h,lng_h]] = df.loc[i-1,[lat_h,lng_h]]
        else:
            df.loc[i,[lat_h,lng_h]] = get_geocode_api(df.loc[i,addr_h])
    return(df)

In [440]:
df = pd.DataFrame({'Address':['Rockland, ID','Rockland, ID','Pocatello, ID']*10,})

In [444]:
df = get_geocodes(df,'Address',repeat_addresses=False)


In [None]:
update_data()

In [382]:
def fill_missing_lat_lngs(df, addr_h="Address",lat_h='Lattitude',lng_h='Longitude'):
    """Fills in missing lattitude and longitude values in an existing df
    df: dataframe with location, lat, lng data
    addr_h,lat_h,lng_h: the dataframe header for the location,lattitude and longitude data respectively"""
    for i in range(len(df)):
        if any(pd.isna(df.loc[i, [lat_h,lng_h]])):
            df.loc[i, [lat_h,lng_h]] = get_geocode_api(addr_h)
    return(df)

In [383]:
def remove_bad_data(df, addr_h, lat_h, lng_h):
    """Removes all rows that do not include either an address or lat/lng combination
    df: dataframe with location, lat, lng data
    addr_h,lat_h,lng_h: the dataframe header for the location,lattitude and longitude data respectively"""
    no_addr = pd.isna(df[addr_h])
    no_lat_lng = pd.isna(df[lat_h])|pd.isna(df[lng_h])
    has_either = ~(no_addr&no_lat_lng)
    return(df.loc[has_either])

def get_clean_data(f, addr_h='Address',lat_h='Lattitude',lng_h='Longitude', repeat_addresses = False):
    """Reads in data from a csv file fills in missing lattitude and lng data, then saves file.
    f: csv file path
    addr_h: column header for addresses in csv
    lat_h/lng_h: column header for lattitudes/longitudes in csv
    """
    df = pd.read_csv(f)
    locations = df[addr_h]
    if lat in df.index:
        df = remove_bad_data(df, addr_h, lat_h, lng_h)
        df = fill_missing_lat_lngs(df, addr_h, lat_h, lng_h)
    else:
        df.dropna(how='any',subset = [addr_h])
        df = get_geocodes(df, addr_h, repeat_addresses = False)
    return(df)
    

In [495]:
ls

 Volume in drive C has no label.
 Volume Serial Number is 64C0-5E99

 Directory of C:\Users\scott\OneDrive\Desktop\Python

01/22/2022  11:56 AM    <DIR>          .
01/22/2022  11:56 AM    <DIR>          ..
01/21/2022  03:32 PM    <DIR>          .ipynb_checkpoints
01/22/2022  11:56 AM            48,466 Data Map App.ipynb
07/09/2021  01:16 PM    <DIR>          MachineLearning
02/25/2020  08:07 PM             3,241 Marcus_Game.py
01/21/2022  12:37 PM             1,666 Untitled.ipynb
01/21/2022  04:35 PM             1,615 Untitled1.ipynb
               4 File(s)         54,988 bytes
               4 Dir(s)  15,049,924,608 bytes free


In [490]:
get_clean_data('')

In [488]:
df

Unnamed: 0,Address,Lattitude,Longitude,0,2
0,,42.573247,,0.0,0.0
1,,42.573247,-112.877213,,
2,"Pocatello, ID",,,,
3,"Rockland, ID",42.573247,-112.877213,,
4,"Rockland, ID",42.573247,-112.877213,,
5,"Pocatello, ID",42.862029,-112.450627,,
6,"Rockland, ID",42.573247,-112.877213,,
7,"Rockland, ID",42.573247,-112.877213,,
8,"Pocatello, ID",42.862029,-112.450627,,
9,"Rockland, ID",42.573247,-112.877213,,


In [491]:
addr_h='Address'
lat_h='Lattitude'
lng_h= 'Longitude'
remove_bad_data(df, addr_h, lat_h, lng_h)

Unnamed: 0,Address,Lattitude,Longitude,0,2
1,,42.573247,-112.877213,,
2,"Pocatello, ID",,,,
3,"Rockland, ID",42.573247,-112.877213,,
4,"Rockland, ID",42.573247,-112.877213,,
5,"Pocatello, ID",42.862029,-112.450627,,
6,"Rockland, ID",42.573247,-112.877213,,
7,"Rockland, ID",42.573247,-112.877213,,
8,"Pocatello, ID",42.862029,-112.450627,,
9,"Rockland, ID",42.573247,-112.877213,,
10,"Rockland, ID",42.573247,-112.877213,,


In [None]:
df.loc[0,['Address','Longitude']]=pd.NA
df.loc[1,['Address']]=pd.NA
df.loc[2,['Lattitude','Longitude']]=pd.NA

In [456]:
df

Unnamed: 0,Address,Lattitude,Longitude,0,2
0,,42.573247,,0.0,0.0
1,,42.573247,-112.877213,,
2,"Pocatello, ID",,,,
3,"Rockland, ID",42.573247,-112.877213,,
4,"Rockland, ID",42.573247,-112.877213,,
5,"Pocatello, ID",42.862029,-112.450627,,
6,"Rockland, ID",42.573247,-112.877213,,
7,"Rockland, ID",42.573247,-112.877213,,
8,"Pocatello, ID",42.862029,-112.450627,,
9,"Rockland, ID",42.573247,-112.877213,,


In [455]:
remove_bad_data(df, 'Address','Longitude','Lattitude')

ValueError: Cannot index with multidimensional key

In [261]:
places=['Pocatello, ID',]*10000
addr_h =
lat_h = 
lng_h = 
repeat_addresses = True



In [388]:
get_geocode_api('Rockland, ID')

(42.5732468, -112.8772126)

In [None]:
geolocator = Nominatim(user_agent='Idaho')
lat, lng = get_geocodes(places)
m = folium.Map(location=(get_midpoint(lat), get_midpoint(lng)),zoom_start=7,)

In [298]:
rnd_lats = lat[0]+np.random.normal(0,0.1,100)
rnd_lngs = lng[0]+np.random.normal(0,0.1,100)
data = np.array((rnd_lats,rnd_lngs)).T
places = [f'{rnd_lats[i]} {rnd_lngs[i]}' for i in range(len(rand_lats))]

In [300]:
rand_lngs

array([-112.31560644, -112.27932313, -112.39830368, -112.25283088,
       -112.17338793, -112.191538  , -112.1081798 , -112.23579248,
       -112.23742519, -112.30869004, -112.27125161, -112.19318381,
       -112.22921428, -112.36696804, -112.08249196, -112.153463  ,
       -112.38883693, -112.21777906, -112.41530132, -112.32159571,
       -112.24333131, -112.28371215, -112.27133378, -112.20523939,
       -112.2815152 , -112.20395638, -112.30040517, -112.04808052,
       -112.32275488, -112.11711102, -112.15608135, -112.17625765,
       -112.47169213, -112.23125624, -112.29893625, -112.37086351,
       -112.29457983, -112.21178764, -112.27304374, -112.2080191 ,
       -112.29434368, -112.1119244 , -112.37081624, -112.13650966,
       -112.30898535, -112.42955935, -112.17068678, -112.20919317,
       -112.37368613, -112.14869304, -112.05178599, -112.16738828,
       -112.30665945, -112.29309026, -112.18741378, -112.26262165,
       -112.13598946, -112.32168368, -112.26786833, -112.19742

In [None]:
plugins.HeatMap(data,radius=15, blur = 10).add_to(folium.FeatureGroup(name='Heat Map')).add_to(m)
#folium.LayerControl().add_to(m)


In [None]:
"""for i in range(len(rand_lats)):
    folium.Marker(location=list(data[i]), 
                    popup=f"<strong>{i}</strong>", 
                    tooltip=f'{places[i]}', 
                    icon=folium.Icon(color='green')).add_to(folium.FeatureGroup(name='Markers')).add_to(m)"""

In [15]:
m.save('map.html')