# Where should I move?
One of the biggest factors in deciding where to move is the temperature. This notebook imports historical temperature data for selected locations and charts them together. I can compare the historical average temperature (in farenheit) of where I live now to where I might move. Soon, I will add predictions, to see how hot it might be in the future.

## import packages

In [1]:
# ! pip install --upgrade pip

# ! pip install pandas
# ! pip install numpy
# ! pip install plotly
# ! pip install scikit-learn
# ! pip install prophet
# ! pip install ipywidgets

# # ! pip install hvplot datashader holoviews panel param colorcet

# ! pip install meteostat

# https://dev.meteostat.net/python/stations.html#example

In [2]:
import pandas as pd
import numpy as np
import plotly
import plotly.express as px
import plotly.io as pio
import sklearn as sk
from collections import defaultdict

import meteostat
import prophet

from prophet import Prophet
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_absolute_error

# Import Meteostat library and dependencies
from datetime import datetime, date
import matplotlib.pyplot as plt
from meteostat import Point, Daily, Monthly
from functools import reduce

## functions

In [3]:
# function: convert to farenheit

def farenheit(c):
    f = (c * 1.8) + 32
    return f

## blog example - prophet

In [4]:
# # Set time period
# start3 = datetime(2021, 1, 1)
# end3 = datetime(2022, 9, 30)

# # Create Point for Bangalore, KARNATAKA
# place3 = Point(12.971599, 77.594566)
# data3 = Daily(place3, start3, end3)
# data3 = data3.fetch()

In [5]:
# The input to Prophet is always a dataframe with two columns: ds and y. 
# The ds (datestamp) column should be of a format expected by Pandas, ideally 
# YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp. 
# The y column must be numeric, and represents the measurement we wish to forecast.

In [6]:
# train = data3.loc[:'2022-08-31']
# test = data3.loc['2022-09-01':]

# train = data3[['tavg']]
# train = train.reset_index()
# train.columns = ['ds', 'y']

In [7]:
# model = Prophet()
# model.fit(train)
# future = pd.DataFrame(test.index.values)
# future.columns = ['ds']
# forecast = m.predict(future) # <== ? TODO - here, what is m.?

In [8]:
# # calculate MAE between expected and predicted values
# y_true = test['tavg'].values
# y_pred = forecast['yhat'].values
# rmse = math.sqrt(mean_squared_error(y_true, y_pred))
# print('RMSE:', rmse)

In [9]:
# # plot expected vs actual
# plt.plot(y_true, label='Actual')
# plt.plot(y_pred, label='Predicted')
# plt.ylim(ymax=30, ymin=15)
# plt.legend()
# plt.show()

In [10]:
# Note that the forecasting model we created here is a very simple model. 
# The model can be further improved based on domain knowledge and trying different 
# models such as ARIMA, SARIMA, SARIMAX, etc. 

# places

In [11]:
# points for places (lat, long)

salem = Point(44.942898, -123.035095)
aberdeen = Point(46.975460, -123.815529)
rapidcity = Point(44.082989, -103.224457)
alameda = Point(37.765340, -122.241870)
hawthornwoods = Point(40.741895, -73.989308)


In [12]:
places = [salem, aberdeen, rapidcity, alameda]
names = ['salem', 'aberdeen', 'rapidcity', 'alameda']

## get temperature averages for selected places

In [13]:
# Set time period
start = datetime(2020, 1, 1)
end = datetime.today()

placelen = range(len(places))

places_list = []
yaxis_names = []

for p in placelen:
    placename = names[p]
    d = Daily(places[p], start, end)
    d_data_avg  = d.fetch()[['tavg']].reset_index()
    d_data_avg.columns = ['time_index', f"{placename}_tavg"]
    places_list.append(d_data_avg)
    yaxis_names.append(f"{placename}_tavg")

places_df = reduce(lambda left, right: pd.merge(left,right,on=['time_index'], how='outer'), places_list).fillna(0)


## convert to farenheit

In [14]:
cels = places_df.select_dtypes(include=['float64']).columns.tolist()
places_df[cels] = places_df[cels].apply(lambda x : farenheit(x))

## plot

In [15]:
# using matplotlib
# places_df.plot(y=yaxis_names, x='time_index')

# plt.show()

In [19]:
# using plotly
fig = px.line(places_df, x="time_index", y=yaxis_names, title="historical average temperatures", template='ggplot2')

fig.update_layout(xaxis_title="date", yaxis_title="Avg Temp (f)")

fig.show()