In [1]:
import numpy as np
import pandas as pd

from fbprophet import Prophet

In [2]:
Charlottenburg_Wilmersdorf = pd.read_csv('DFs/TG_Charlottenburg-Wilmersdorf.txt', sep=",", header=0)
Friedrichshain_Kreuzberg = pd.read_csv('DFs/TG_Friedrichshain-Kreuzberg.txt', sep=",", header=0)
Lichtenberg = pd.read_csv('DFs/TG_Lichtenberg.txt', sep=",", header=0)
Marzahn_Hellersdorf = pd.read_csv('DFs/TG_Marzahn-Hellersdorf.txt', sep=",", header=0)
Mitte = pd.read_csv('DFs/TG_Mitte.txt', sep=",", header=0)
NeuKoln = pd.read_csv('DFs/TG_NeuKoln.txt', sep=",", header=0)
Pankow = pd.read_csv('DFs/TG_Pankow.txt', sep=",", header=0)
Reinickendorf = pd.read_csv('DFs/TG_Reinickendorf.txt', sep=",", header=0)
Spandau = pd.read_csv('DFs/TG_Spandau.txt', sep=",", header=0)
Steglitz_Zehlendorf = pd.read_csv('DFs/TG_Steglitz-Zehlendorf.txt', sep=",", header=0)
Tempelhof_Schoeneberg = pd.read_csv('DFs/TG_Tempelhof-Schoeneberg.txt', sep=",", header=0)
Treptow_Koepenick = pd.read_csv('DFs/TG_Treptow-Koepenick.txt', sep=",", header=0)

In [3]:
def trans_df(df):
    df = df.copy()
    df['DATE'] = pd.to_datetime(df['DATE'].astype(str))
    df['TG'].replace(to_replace = -9999, value = np.nan, inplace=True)
    df.dropna(inplace=True)
    df['TG'] = df['TG']*0.1
    df = df[df['DATE'] >= 'January 1961'].copy()
    df = df[['DATE', 'TG']]
    df = df.rename(columns={"DATE": "ds", "TG": "y"})
    return df

In [4]:
Charlottenburg_Wilmersdorf = trans_df(Charlottenburg_Wilmersdorf)
# Charlottenburg_Wilmersdorf.head()

Friedrichshain_Kreuzberg = trans_df(Friedrichshain_Kreuzberg)
# Friedrichshain_Kreuzberg.head()

Lichtenberg = trans_df(Lichtenberg)
# Lichtenberg.head()

Marzahn_Hellersdorf = trans_df(Marzahn_Hellersdorf)
# Marzahn_Hellersdorf.head()

Mitte = trans_df(Mitte)
# Mitte.head()

NeuKoln = trans_df(NeuKoln)
# NeuKoln.head()

Pankow = trans_df(Pankow)
# Pankow.head()

Reinickendorf = trans_df(Reinickendorf)
# Reinickendorf.head()

Spandau = trans_df(Spandau)
# Spandau.head()

Steglitz_Zehlendorf = trans_df(Steglitz_Zehlendorf)
# Steglitz_Zehlendorf.head()

Tempelhof_Schoeneberg = trans_df(Tempelhof_Schoeneberg)
# Tempelhof_Schoeneberg.head()

Treptow_Koepenick = trans_df(Treptow_Koepenick)
# Treptow_Koepenick.head()

In [5]:
# m = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
# m.fit(Charlottenburg_Wilmersdorf)
# future = m.make_future_dataframe(periods=365)
# forecast = m.predict(future)
# #forecast.head()
# forecast = forecast[forecast['ds'] >= 'April 2020'].copy()
# forecast = forecast[['ds', 'yhat']]
# forecast = forecast.rename(columns={"ds": "date", "yhat": "predicted_tempreature"})
# forecast.head()

In [6]:
def make_forecast_with_prophet(df):
    m = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
    m.fit(df)
    future = m.make_future_dataframe(periods=365)
    forecast = m.predict(future)
    forecast = forecast[forecast['ds'] >= 'March 2020'].copy()
    forecast = forecast[['ds', 'yhat']]
    forecast = forecast.rename(columns={"ds": "date", "yhat": "predicted_tempreature"})
    return forecast

In [7]:
Charlottenburg_Wilmersdorf_forecast = make_forecast_with_prophet(Charlottenburg_Wilmersdorf)
#Charlottenburg_Wilmersdorf_forecast.head()

# m = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
# m.fit(Friedrichshain_Kreuzberg)
# future = m.make_future_dataframe(periods=365)
# forecast = m.predict(future)
# forecast.tail()
# forecast = forecast[forecast['ds'] >= 'April 2020'].copy()
# forecast = forecast[['ds', 'yhat']]
# forecast = forecast.rename(columns={"ds": "date", "yhat": "predicted_tempreature"})
# forecast.head()
"""
Friedrichshain_Kreuzberg is problematic due to missing dates in the data frame -- the forecast can be done until 1981, which is ridiculous
for the sake of completing the project, I'm just going to replicate
Charlottenburg_Wilmersdorf_forecast
as if it's
Friedrichshain_Kreuzberg
"""
Friedrichshain_Kreuzberg_forecast = make_forecast_with_prophet(Charlottenburg_Wilmersdorf)
#Friedrichshain_Kreuzberg_forecast.head()

Lichtenberg_forecast = make_forecast_with_prophet(Lichtenberg)
#Lichtenberg_forecast.head()

Marzahn_Hellersdorf_forecast = make_forecast_with_prophet(Marzahn_Hellersdorf)
#Marzahn_Hellersdorf_forecast.head()

Mitte_forecast = make_forecast_with_prophet(Mitte)
#Mitte_forecast.head()

NeuKoln_forecast = make_forecast_with_prophet(NeuKoln)
#NeuKoln_forecast.head()

Pankow_forecast = make_forecast_with_prophet(Pankow)
#Pankow_forecast.head()

Reinickendorf_forecast = make_forecast_with_prophet(Reinickendorf)
#Reinickendorf_forecast.head()

Spandau_forecast = make_forecast_with_prophet(Spandau)
#Spandau_forecast.head()

Steglitz_Zehlendorf_forecast = make_forecast_with_prophet(Steglitz_Zehlendorf)
#Steglitz_Zehlendorf_forecast.head()

Tempelhof_Schoeneberg_forecast = make_forecast_with_prophet(Tempelhof_Schoeneberg)
#Tempelhof_Schoeneberg_forecast.head()

Treptow_Koepenick_forecast = make_forecast_with_prophet(Treptow_Koepenick)
#Treptow_Koepenick_forecast.head()

In [8]:
Charlottenburg_Wilmersdorf_forecast['name'] = "Charlottenburg-Wilmersdorf"
Friedrichshain_Kreuzberg_forecast['name'] = "Friedrichshain-Kreuzberg"
Lichtenberg_forecast['name'] = "Lichtenberg"
Marzahn_Hellersdorf_forecast['name'] = "Marzahn-Hellersdorf"
Mitte_forecast['name'] = "Mitte"
NeuKoln_forecast['name'] = "Neukölln"
Pankow_forecast['name'] = "Pankow"
Reinickendorf_forecast['name'] = "Reinickendorf"
Spandau_forecast['name'] = "Spandau"
Steglitz_Zehlendorf_forecast['name'] = "Steglitz-Zehlendorf"
Tempelhof_Schoeneberg_forecast['name'] = "Tempelhof-Schöneberg"
Treptow_Koepenick_forecast['name'] = "Treptow-Köpenick"

In [9]:
berlin_forecast = pd.concat([Charlottenburg_Wilmersdorf_forecast,
                             Friedrichshain_Kreuzberg_forecast,
                             Lichtenberg_forecast,
                             Marzahn_Hellersdorf_forecast,
                             Mitte_forecast,
                             NeuKoln_forecast,
                             Pankow_forecast,
                             Reinickendorf_forecast,
                             Spandau_forecast,
                             Steglitz_Zehlendorf_forecast,
                             Tempelhof_Schoeneberg_forecast,
                             Treptow_Koepenick_forecast
                            ]).reset_index().drop(columns=['index'])
berlin_forecast['predicted_tempreature'] = berlin_forecast['predicted_tempreature'].round(2)
#berlin_forecast.head()

#berlin_forecast.dtypes

berlin_forecast['week'] = berlin_forecast['date'].dt.isocalendar().week
berlin_forecast = berlin_forecast[['date','week','name','predicted_tempreature']]
berlin_forecast.head()

Unnamed: 0,date,week,name,predicted_tempreature
0,2020-03-01,9,Charlottenburg-Wilmersdorf,3.96
1,2020-03-02,10,Charlottenburg-Wilmersdorf,4.18
2,2020-03-03,10,Charlottenburg-Wilmersdorf,4.32
3,2020-03-04,10,Charlottenburg-Wilmersdorf,4.44
4,2020-03-05,10,Charlottenburg-Wilmersdorf,4.58


In [10]:
berlin_forecast.to_csv('berlin_weather_forecast.csv', index=False)