This code takes geolocation data and dates. The data is then plotted as a density plot.

Future work:
Add in date ranges to include (if applicable)
Remove data in files where a lat/lng column is present but doesn't have a location or lat/lng data

In [48]:
import folium
from geopy.geocoders import Nominatim
import numpy as np
from folium import plugins
import pandas as pd
#import geopandas
#import geojsoncontour

In [111]:
#Data loading functions
def get_geocode_api(location):
    """Gets the lattitude and longitude of location
    location: geographical location or address
    returns: lattitude and longitude of location"""
    place = geolocator.geocode(location)
    lat = place.latitude
    lng = place.longitude
    return(lat, lng)

def is_same_as_previous(l, idx):
    """Checks if the current index in a list is the same as previous
    l: list of items
    idx: index of l to check against"""
    if idx==0:
        return False
    else:
        return (l[idx-1]==l[idx])

def get_geocodes(df, addr_h = 'Address',lat_h = 'Lattitude',lng_h = 'Longitude', repeat_addresses = False):
    """Gets the lattitude and longitude of all locations and returns df with those values
    addr_h,lat_h,lng_h: the dataframe headers for the location,lattitude and longitude data respectively
    repeat_addresses: Bool, if True it will check to see if that address is already found
        - May be much faster than using API mulitple times, depending on addr_h column length and number of repeats etc"""
    
    if repeat_addresses==True: 
        df = df.sort_values(by=[addr_h], ignore_index=True) #Reduce computation time when finding repeats
    for i in range(len(df)):
        if repeat_addresses and is_same_as_previous(df[addr_h],i):
            df.loc[i,[lat_h,lng_h]] = df.loc[i-1,[lat_h,lng_h]]
        else:
            df.loc[i,[lat_h,lng_h]] = get_geocode_api(df.loc[i,addr_h])
    return(df)

def fill_missing_lat_lngs(df, addr_h="Address",lat_h='Lattitude',lng_h='Longitude'):
    """Fills in missing lattitude and longitude values in an existing df
    df: dataframe with location, lat, lng data
    addr_h,lat_h,lng_h: the dataframe header for the location,lattitude and longitude data respectively"""
    for i in range(len(df)):
        if any(pd.isna(df.loc[i, [lat_h,lng_h]])):
            df.loc[i, [lat_h,lng_h]] = get_geocode_api(addr_h)
    return(df)

def remove_bad_data(df, addr_h, lat_h, lng_h):
    """Removes all rows that do not include either an address or both lattitude and longitude
    df: dataframe with location, lat, lng data
    addr_h,lat_h,lng_h: the dataframe header for the location,lattitude and longitude data respectively"""
    no_addr = pd.isna(df[addr_h])
    no_lat_lng = pd.isna(df[lat_h])|pd.isna(df[lng_h])
    has_either = ~(no_addr&no_lat_lng)
    return(df.loc[has_either])

def get_data_and_clean(f, addr_h='Address',lat_h='Lattitude',lng_h='Longitude', repeat_addresses = False, reload_gis=False):
    """Reads in data from a csv file fills in missing lattitude and lng data, then saves file.
    f: csv file path
    addr_h,lat_h,lng_h: the csv header for the location,lattitude and longitude data respectively
    """
    df = pd.read_csv(f)
    locations = df[addr_h]
    if (lat_h in df.columns) and not reload_gis:
        df = remove_bad_data(df, addr_h, lat_h, lng_h)
        df = fill_missing_lat_lngs(df, addr_h, lat_h, lng_h)
    else:
        df = df.dropna(subset = [addr_h])
        df = get_geocodes(df, addr_h, repeat_addresses = False)
    return(df)
    

In [224]:
#Plotting Functions
def get_midpoint(x):
    """finds the midpoint between max/min of a numpy array
    x: array/series of numerical values
    """
    return((x.min()+x.max())/2)

def get_name(df, addr_h, lat_h, lng_h, idx):
    """Gets address if given, else returns lat/long string
    df: dataframe with the data
    addr_h: string header for the address
    idx: column index of the pandas dataframe (ie for .loc)"""
    val= df.loc[idx, addr_h]
    if not pd.isna(val):
        return(val)
    else:
        return(f'Lat: {df.loc[idx,lat_h]:.4f} Lng: {df.loc[idx,lng_h]:.4f}')

def make_heatmap(df, lat_h, lng_h):
    """Plots the heatmap of points from an excel file
    df: dataframe with data
    lat_h,lng_h: the csv header for the lattitude and longitude data respectively
    """
    m = folium.Map(location=(get_midpoint(df[lat_h]), get_midpoint(df[lng_h])),zoom_start=7,)
    plugins.HeatMap(df[[lat_h,lng_h]],radius=15, blur = 10).add_to(folium.FeatureGroup(name='Heat Map')).add_to(m)
    return(m)

def make_pinmap(df, addr_h, lat_h, lng_h):
    """Plots the pinpoints for each data point in an excel file
    df: dataframe with data
    addr_h,lat_h,lng_h: the csv header for the location,lattitude and longitude data respectively
    """
    m = folium.Map(location=(get_midpoint(df[lat_h]), get_midpoint(df[lng_h])),zoom_start=7,)
    if addr_h not in df.columns:
        df[addr_h]=[f'Lat: {df.loc[i,lat_h]:.4f} Lng: {df.loc[i,lng_h]:.4f}' for i in range(len(df))]
    for i in df.index:
        name = get_name(df, addr_h, lat_h, lng_h, i)
        folium.Marker(location=list(df.loc[i,[lat_h,lng_h]]), 
                        popup=f"<strong>{i}</strong>", 
                        tooltip=f'{name}', 
                        icon=folium.Icon(color='green')).add_to(folium.FeatureGroup(name='Markers')).add_to(m)
    return(m)

def make_html(f_out, m):
    """Saves the HTML of the map
    f_out: file path to save
    m: folium map to save"""
    m.save(f_out)

In [113]:
#Auxillary Funcitons
def generate_fake_data(location):
    """generates data randomly around a specific location
    location: address to any location used to find geolocations
    returns: dataframe with lattitudes and longitudes of 100 points"""
    lat, lng = get_geocode_api(location)
    rnd_lats = lat+np.random.normal(0,0.1,100)
    rnd_lngs = lng+np.random.normal(0,0.1,100)
    return(pd.DataFrame({'Lattitude':rnd_lats,'Longitude':rnd_lngs}))

def replace_blank_names(df, addr_h='Address',lat_h='Lattitude',lng_h='Longitude'):
    """Replaces all addresses as NAN with lattitude and longitude strings
    df: dataframe with data
    addr_h,lat_h,lng_h: the csv header for the location,lattitude and longitude data respectively
    returns: dataframe with updated addresses
    """
    if any(pd.isna(df[addr_h])):
        dfTemp = df[pd.isna(df[addr_h])]
        df.loc[pd.isna(df[addr_h]),'Address'] = [f'Lat: {dfTemp.loc[i,lat_h]:.4f} Lng: {dfTemp.loc[i,lng_h]:.4f}' for i in dfTemp.index]
    return(df)

In [210]:
addr_h='Address'
lat_h='Lattitude'
lng_h= 'Longitude'
repeat_addresses = True
file='Test Data/Test_data.csv'
reload_gis = False #If True re-finds the GIS coordinates, drops rows with no address 
fake_data_location = 'Nampa, Idaho' #'Nampa, Idaho' #Will ONLY create fake data if this is anything but None

In [211]:
if fake_data_location is not None:
    df=generate_fake_data(fake_data_location)
else:
    df=get_data_and_clean(file,
                          addr_h=addr_h, 
                          lat_h=lat_h, 
                          lng_h=lng_h, 
                          repeat_addresses=repeat_addresses, 
                          reload_gis=reload_gis)

In [212]:
geolocator = Nominatim(user_agent='Data_Map_App')

In [214]:
m1 = make_heatmap(df, lat_h, lng_h)
m1

In [215]:
m2 = make_pinmap(df, addr_h, lat_h, lng_h)
m2

In [225]:
make_html('Heatmap.html',m1)
make_html('Pinmap.html',m2)
#m1.save('Heatmap.html')

In [None]:
m2.save('Pinmap.html')

In [220]:
ls

 Volume in drive C has no label.
 Volume Serial Number is 64C0-5E99

 Directory of C:\Users\scott\OneDrive\Desktop\Data Science\data_map_app

01/22/2022  03:13 PM    <DIR>          .
01/22/2022  03:13 PM    <DIR>          ..
01/21/2022  01:54 PM             1,928 .gitignore
01/22/2022  12:06 PM    <DIR>          .ipynb_checkpoints
01/22/2022  03:13 PM           261,550 data_map_app.ipynb
01/22/2022  02:53 PM             2,422 environment.yaml
01/22/2022  03:14 PM             7,601 map.html
01/22/2022  12:11 PM             1,320 README.md
01/22/2022  01:59 PM    <DIR>          Test Data
               5 File(s)        274,821 bytes
               4 Dir(s)  14,904,676,352 bytes free


In [216]:
df

Unnamed: 0,Lattitude,Longitude,Address
0,43.557607,-116.592420,Lat: 43.5576 Lng: -116.5924
1,43.573849,-116.622651,Lat: 43.5738 Lng: -116.6227
2,43.460162,-116.723967,Lat: 43.4602 Lng: -116.7240
3,43.551028,-116.545589,Lat: 43.5510 Lng: -116.5456
4,43.587324,-116.507663,Lat: 43.5873 Lng: -116.5077
...,...,...,...
95,43.504528,-116.669081,Lat: 43.5045 Lng: -116.6691
96,43.603303,-116.590340,Lat: 43.6033 Lng: -116.5903
97,43.480847,-116.429229,Lat: 43.4808 Lng: -116.4292
98,43.468212,-116.561908,Lat: 43.4682 Lng: -116.5619
