In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import folium
import datetime

# https://www.linkedin.com/pulse/visualizing-nyc-bike-data-interactive-animated-maps-folium-toso/

lake_df = pd.read_csv('milfoil_tableB_all_years_cleaned.csv')
lake_df['corrected_longitude'] = np.where(lake_df['longitude'] > 0, lake_df['longitude']*-1, lake_df['longitude'])
lake_df['milfoil_presence'] = lake_df['milfoil_presence'].fillna('N')

In [2]:
lake_df['datetime_year'] = pd.to_datetime(lake_df['year'], format='%Y') # to_datetime requires a string
lake_df

Unnamed: 0,id,sample_point,latitude,longitude,milfoil_presence,year,corrected_longitude,datetime_year
0,0,1,42.977652,-75.887120,M,2008,-75.887120,2008-01-01
1,1,2,42.977520,-75.887213,S,2008,-75.887213,2008-01-01
2,2,3,42.977661,-75.887699,M,2008,-75.887699,2008-01-01
3,3,4,42.977674,-75.888337,M,2008,-75.888337,2008-01-01
4,4,5,42.977542,-75.889355,M,2008,-75.889355,2008-01-01
...,...,...,...,...,...,...,...,...
4555,299,300,42.960420,-75.871387,N,2022,-75.871387,2022-01-01
4556,300,301,42.961219,-75.871882,N,2022,-75.871882,2022-01-01
4557,301,302,42.961837,-75.872214,N,2022,-75.872214,2022-01-01
4558,302,303,42.948490,-75.866480,N,2022,-75.866480,2022-01-01


In [3]:
# none, trace, sparse, moderate, dense
milfoil_abundance = ['N', 'T', 'S', 'M', 'D'] # use index for number value
lake_df['milfoil_number'] = lake_df['milfoil_presence']

for index, value in enumerate(milfoil_abundance):
  lake_df['milfoil_number'] = np.where(lake_df['milfoil_presence'] == value, index, lake_df['milfoil_number'])
lake_df

Unnamed: 0,id,sample_point,latitude,longitude,milfoil_presence,year,corrected_longitude,datetime_year,milfoil_number
0,0,1,42.977652,-75.887120,M,2008,-75.887120,2008-01-01,3
1,1,2,42.977520,-75.887213,S,2008,-75.887213,2008-01-01,2
2,2,3,42.977661,-75.887699,M,2008,-75.887699,2008-01-01,3
3,3,4,42.977674,-75.888337,M,2008,-75.888337,2008-01-01,3
4,4,5,42.977542,-75.889355,M,2008,-75.889355,2008-01-01,3
...,...,...,...,...,...,...,...,...,...
4555,299,300,42.960420,-75.871387,N,2022,-75.871387,2022-01-01,0
4556,300,301,42.961219,-75.871882,N,2022,-75.871882,2022-01-01,0
4557,301,302,42.961837,-75.872214,N,2022,-75.872214,2022-01-01,0
4558,302,303,42.948490,-75.866480,N,2022,-75.866480,2022-01-01,0


In [4]:
lake_df.milfoil_number.unique()

array([3, 2, 4, 0, 1], dtype=object)

In [5]:
# delete lines where milfoil number is 0
milfoil_df = lake_df.loc[lake_df.milfoil_number != 0]
milfoil_df.milfoil_number.unique()

array([3, 2, 4, 1], dtype=object)

In [6]:
milfoil_df.year.unique()

array([2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018,
       2019, 2020, 2021, 2022])

In [7]:
milfoil_df.datetime_year.unique()

<DatetimeArray>
['2008-01-01 00:00:00', '2009-01-01 00:00:00', '2010-01-01 00:00:00',
 '2011-01-01 00:00:00', '2012-01-01 00:00:00', '2013-01-01 00:00:00',
 '2014-01-01 00:00:00', '2015-01-01 00:00:00', '2016-01-01 00:00:00',
 '2017-01-01 00:00:00', '2018-01-01 00:00:00', '2019-01-01 00:00:00',
 '2020-01-01 00:00:00', '2021-01-01 00:00:00', '2022-01-01 00:00:00']
Length: 15, dtype: datetime64[ns]

In [8]:
milfoil_2021 = milfoil_df.loc[milfoil_df.datetime_year == '2021-01-01 00:00:00']
len(milfoil_2021)

25

In [9]:
milfoil_2022 = milfoil_df.loc[milfoil_df.datetime_year == '2022-01-01 00:00:00']
len(milfoil_2022)

114

In [10]:
yearly_row_count = milfoil_df.groupby(['datetime_year']).count()
yearly_row_count

Unnamed: 0_level_0,id,sample_point,latitude,longitude,milfoil_presence,year,corrected_longitude,milfoil_number
datetime_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2008-01-01,281,281,281,281,281,281,281,281
2009-01-01,124,124,124,124,124,124,124,124
2010-01-01,88,88,88,88,88,88,88,88
2011-01-01,223,223,223,223,223,223,223,223
2012-01-01,125,125,125,125,125,125,125,125
2013-01-01,258,258,258,258,258,258,258,258
2014-01-01,206,206,206,206,206,206,206,206
2015-01-01,286,286,286,286,286,286,286,286
2016-01-01,290,290,290,290,290,290,290,290
2017-01-01,166,166,166,166,166,166,166,166


In [14]:
def create_geojson_features(df):
  features = []
  for index, row in df.iterrows():
    # print((row.datetime_year)) # goes thru 2022
    feature = {
      'type': 'Feature',
      'geometry': {
        'type': 'Point',
        'coordinates': [row.corrected_longitude, row.latitude]
      },
      'properties': {
        'time': str(row.datetime_year),
        'style': {'color': ''},
        'icon': 'circle',
        'iconstyle': {
          'fillColor': '#008000',
          'fillOpacity': 0.5,
          'stroke': 'false',
          'radius': df.milfoil_number[index]
        }
      }
    }
    features.append(feature)
    # print(len(features), features) # length 4560 (values are 2008 - 2022)
  return(features)

start_geojson = create_geojson_features(milfoil_df)
start_geojson[0] # first one at index 0, any number up to 4559 shows coordinate set & year 2008-2022

{'type': 'Feature',
 'geometry': {'type': 'Point', 'coordinates': [-75.88712, 42.9776517]},
 'properties': {'time': '2008-01-01 00:00:00',
  'style': {'color': ''},
  'icon': 'circle',
  'iconstyle': {'fillColor': '#008000',
   'fillOpacity': 0.5,
   'stroke': 'false',
   'radius': 3}}}

In [15]:
len(start_geojson)

2825

In [17]:
start_geojson[0]['properties']['time'][:4]

'2008'

In [20]:
pts_2021 = list(filter(lambda match: match['properties']['time'][:4] == str(2021), start_geojson))
len(pts_2021)

25

In [23]:
pts_per_yr = []
year_to_count = 2008

for i in range(2023-2008):
  this_year = year_to_count + i
  year_list = list(filter(lambda match: match['properties']['time'][:4] == str(this_year), start_geojson))
  pts_per_yr.append({this_year: len(year_list)})
  i+1
  
pts_per_yr

[{2008: 281},
 {2009: 124},
 {2010: 88},
 {2011: 223},
 {2012: 125},
 {2013: 258},
 {2014: 206},
 {2015: 286},
 {2016: 290},
 {2017: 166},
 {2018: 284},
 {2019: 117},
 {2020: 238},
 {2021: 25},
 {2022: 114}]

In [12]:
from folium.plugins import TimestampedGeoJson

lake_map = folium.Map(location=[42.950253, -75.870825], zoom_start=13)

# doesn't follow appearances of maps in milfoil_map.ipynb despite:
# same number of points per year in features list as in milfoil_df
# same number of points as in each static map in milfoil_map.ipynb
TimestampedGeoJson(start_geojson, duration='P1Y', period='P1Y', add_last_point=False, transition_time=1000, auto_play=False).add_to(lake_map)

lake_map