This notebook is intended to be a quick look at the Sakura Data that I compiled. I plan on gathering additional data, but I wanted to ensure the data I compiled is useable first before moving on to additonal data to help make predictions.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import matplotlib.pyplot as plt

import folium
import branca
from scipy.interpolate import griddata

In [None]:
!pip install geojsoncontour
import geojsoncontour

In [None]:
first_bloom = pd.read_csv('/kaggle/input/japanese-cherry-blossom-data/sakura_first_bloom_dates.csv')

## Latitude and Longitude Lookups

In [None]:
test = first_bloom.loc[0,'Site Name']

locator = Nominatim(user_agent="myGeocoder")
location = locator.geocode(test)


print(location.address)
print(f"Latitude = {location.latitude}, Longitude = {location.longitude}")

The geocode that looks up the latitude and longitude works fairly well, but needs the country to actually pull in everything correctly.

In [None]:
first_bloom.loc[:,'Site Name'] = first_bloom.loc[:,'Site Name'] + ', Japan'

In [None]:
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
first_bloom['Location'] = first_bloom['Site Name'].apply(geocode)
first_bloom['Latitude'] = first_bloom['Location'].apply(lambda x: x.latitude if x else None)
first_bloom['Longitude'] = first_bloom['Location'].apply(lambda x: x.longitude if x else None)

We need to ensure that all of our sites had their Latitudes and Longitudes looked up correctly. 

In [None]:
first_bloom.loc[first_bloom['Latitude'].isnull()]

# Visualizations

Based on code from here:
https://github.com/python-visualization/folium/issues/958

I've had limited success getting a color map legend for this contour map. I'm still working the issue, but if anyone has any suggestions, please comment below. 



In [None]:
first_bloom['2020'] = first_bloom['2020'].astype('datetime64')

converted_dates = first_bloom['2020'].apply(lambda entry: entry.timestamp() if entry == entry else entry)

converted_df = pd.DataFrame()
converted_df['Site Name'] = first_bloom['Site Name']
converted_df['Dates'] =  first_bloom['2020'].apply(lambda entry: entry.timestamp() if entry == entry else entry)
converted_df['Longitude'] = first_bloom.Longitude
converted_df['Latitude'] = first_bloom.Latitude
converted_df['Deltas'] = (first_bloom['2020'] - first_bloom['2020'].min()).dt.days

converted_df.dropna(inplace=True)
converted_df['Text Dates'] = first_bloom['2020'].dt.strftime("%b %d").dropna()

date_dict = converted_df.set_index('Dates').to_dict()['Text Dates']

In [None]:
faux_dates = pd.DataFrame({
    'Longitude':   np.random.normal(converted_df.Longitude.mean(), converted_df.Longitude.mean(),     1000),
    'Latitude':    np.random.normal(converted_df.Latitude.mean(), converted_df.Latitude.mean(),     1000),
    'Dates': np.random.normal(converted_df.Deltas.mean(), converted_df.Deltas.std() , 1000)})

long_arr = np.linspace(converted_df.Longitude.min(),converted_df.Longitude.max(),500)
lat_arr = np.linspace(converted_df.Latitude.min(),converted_df.Latitude.max(),500)

long_mesh, lat_mesh = np.meshgrid(long_arr,lat_arr)

date_mesh = griddata((converted_df.Longitude, converted_df.Latitude), 
                     converted_df.Deltas, (long_mesh, lat_mesh), method='cubic')

contourf = plt.contourf(long_mesh, lat_mesh, date_mesh, alpha=0.75)

In [None]:
# Convert matplotlib contourf to geojson
geojson = geojsoncontour.contourf_to_geojson(
    contourf=contourf,
    min_angle_deg=3.0,
    ndigits=5,
    stroke_width=1,
    fill_opacity=0.5)

# Set up the folium plot
geomap = folium.Map([converted_df.Latitude.mean(), converted_df.Longitude.mean()], zoom_start=5, tiles="cartodbpositron")

# Plot the contour plot on folium
folium.GeoJson(
    geojson,
    style_function=lambda x: {
        'color':     x['properties']['stroke'],
        'weight':    x['properties']['stroke-width'],
        'fillColor': x['properties']['fill'],
        'opacity':   0.6,
    }).add_to(geomap)

# Add the colormap to the folium map
#cm.caption = 'Temperature'
#geomap.add_child(cm)

first_bloom.apply(lambda row:folium.Marker(location=[row["Latitude"], row["Longitude"]], tooltip=f"{row['Site Name']}<br>{row['2020']}").add_to(geomap), axis=1)

geomap

# Troubleshooting
This is just a section where I'm working on changes to my current visualizations. That way I can work on them without interrupting the organization of the main part of the notebook.

Notes:
* Currently trying to get a color map for folium work and/or recreate the geojson contour map in plotly.


In [None]:
fig, ax = plt.subplots(1,1)
n, bins, patches = ax.hist(converted_df['Deltas'],bins=5)
ticks = ax.get_xticks()
ax.set_xticklabels(ticks, rotation=90)

In [None]:
print(first_bloom['2020'].min() + np.round(bins) * pd.Timedelta(days=1))

first_bloom['2020'].max()

In [None]:
levels = 8
colors = ['blue', 'green', 'yellow']
vmin = converted_df['Deltas'].min()
vmax = converted_df['Deltas'].max()

cmap = branca.colormap.LinearColormap(colors, vmin=vmin,vmax=vmax).to_step(levels)
cmap

In [None]:
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go

In [None]:
dir(contourf)

In [None]:
for item in contourf.collections:
    print(item)

In [None]:
derp_geojson = eval(geojson)

arr_temp=np.ones([len(derp_geojson["features"]),2])

for i in range(0, len(derp_geojson["features"])):
    derp_geojson["features"][i]["id"]=i

#Filling array with price and Id for each geojson spatial object. Z value from contour plot will be stored as title
    arr_temp[i,0]=i
    arr_temp[i,1]=derp_geojson["features"][i]["properties"]["title"]
    
#Transforming array to df
df_contour=pd.DataFrame(arr_temp, columns=["Id","Date"])

In [None]:

#fig = go.Figure(data = go.Choroplethmapbox( 
#    geojson = derp_geojson,
#    locations =  None
#    ) )


#fig.show()

In [None]:
date_array = np.empty((first_bloom['2020'].size,first_bloom['2020'].size),dtype='datetime64[D]')
date_array[:] = 'NaT'
np.fill_diagonal(date_array,first_bloom['2020'])

In [None]:
plt.contourf(first_bloom.Longitude, first_bloom.Latitude,date_array)

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data =
    go.Contour(
        z= date_array
    )
)

fig.show()