Skip to content

Commit

Permalink
Merge pull request #18 from parcaster/fix_model
Browse files Browse the repository at this point in the history
Fix model
  • Loading branch information
ritahaffter committed Dec 2, 2023
2 parents f39e2b7 + 8e7d8ea commit c28e0f1
Show file tree
Hide file tree
Showing 14 changed files with 458 additions and 156 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Downloaded trained model and scaler
model_scripted.pt
scaler.pkl

# Swiss meteo request
.cache.sqlite

Expand Down
22 changes: 14 additions & 8 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
import json
import urllib.request
from flask import Flask, jsonify, request
from flask_cors import CORS
from deploy.single_prediction import SinglePrediction

app = Flask(__name__)
single_prediction = SinglePrediction("deploy/model_scripted.pt", "data/preprocessing/raw_features_2024.csv")
CORS(app)

url_model = "https://api.wandb.ai/files/parcaster/pp-sg-lstm/2cg5mebb/model_scripted.pt"
model_path = "model_scripted.pt"
url_scaler = "https://api.wandb.ai/files/parcaster/pp-sg-lstm/2cg5mebb/scaler.pkl"
scaler_path = "scaler.pkl"

urllib.request.urlretrieve(url_model, model_path)
urllib.request.urlretrieve(url_scaler, scaler_path)

single_prediction = SinglePrediction(model_path, scaler_path, "data/preprocessing/raw_features_2024.csv",
"data/metadata/metadata.json")


@app.route('/')
def hello():
return 'Hello World!'


@app.route('/metadata')
def metadata():
metadata_json = json.load(open("data/metadata/metadata.json"))
return metadata_json


@app.route('/predict', methods=['POST'])
def predict():
if request.method == 'POST':
Expand Down
Empty file added data/metadata/__init__.py
Empty file.
9 changes: 9 additions & 0 deletions data/metadata/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
parking_data_labels = ["P24", "P44", "P42", "P33", "P23", "P25", "P21", "P31", "P53", "P32", "P22", "P52", "P51",
"P43"] # TODO get these from metadata file


# TODO get these from metadata
feature_columns = ['ferien', 'feiertag', 'covid_19', 'olma_offa', 'temperature_2m_max',
'temperature_2m_min', 'rain_sum', 'snowfall_sum', 'sin_minute',
'cos_minute', 'sin_hour', 'cos_hour', 'sin_weekday', 'cos_weekday',
'sin_day', 'cos_day', 'sin_month', 'cos_month']
40 changes: 0 additions & 40 deletions data/preprocessing/prepare_time_features.py

This file was deleted.

53 changes: 53 additions & 0 deletions data/preprocessing/preprocess_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import numpy as np
import pandas as pd
from data.metadata.metadata import feature_columns, parking_data_labels


class PreprocessFeatures:
def __init__(self, df):
self.df = df

def get_features_for_model(self):
self.append_time_features()
# self.get_lagged_features()

return self.df[feature_columns], len(feature_columns)

def append_time_features(self):
# Prepare time-features
## Extract Time Components
self.df['datetime'] = pd.to_datetime(self.df['datetime'], format='%d.%m.%Y %H:%M') # Make Object to datetime
self.df['date'] = self.df['datetime'].dt.date # Extract Date
self.df['year'] = self.df['datetime'].dt.year # Extract Year
self.df['month'] = self.df['datetime'].dt.month # Extract Month
self.df['day'] = self.df['datetime'].dt.day # Extract Day
self.df['weekdayname'] = self.df['datetime'].dt.day_name()
self.df['weekday'] = self.df['datetime'].dt.dayofweek # Extract Weekday
self.df['time'] = self.df['datetime'].dt.strftime('%H:%M') # Extract Time
self.df['hour'] = self.df['datetime'].dt.hour # Extract Hour
self.df['minute'] = self.df['datetime'].dt.minute # Extract Minute

## Decompose Time-Features in sine and cosine component
### Inspired by https://medium.com/mlearning-ai/transformer-implementation-for-time-series-forecasting-a9db2db5c820
### (vgl. https://github.com/nok-halfspace/Transformer-Time-Series-Forecasting/blob/main/Preprocessing.py)

minutes_in_hour = 60
hours_in_day = 24
days_in_week = 7
days_in_month = 30
month_in_year = 12

self.df['sin_minute'] = np.sin(2 * np.pi * self.df['minute'] / minutes_in_hour)
self.df['cos_minute'] = np.cos(2 * np.pi * self.df['minute'] / minutes_in_hour)
self.df['sin_hour'] = np.sin(2 * np.pi * self.df['hour'] / hours_in_day)
self.df['cos_hour'] = np.cos(2 * np.pi * self.df['hour'] / hours_in_day)
self.df['sin_weekday'] = np.sin(2 * np.pi * self.df['weekday'] / days_in_week)
self.df['cos_weekday'] = np.cos(2 * np.pi * self.df['weekday'] / days_in_week)
self.df['sin_day'] = np.sin(2 * np.pi * self.df['day'] / days_in_month)
self.df['cos_day'] = np.cos(2 * np.pi * self.df['day'] / days_in_month)
self.df['sin_month'] = np.sin(2 * np.pi * self.df['month'] / month_in_year)
self.df['cos_month'] = np.cos(2 * np.pi * self.df['month'] / month_in_year)

# def get_lagged_features(self, period=24):
# for label in parking_data_labels:
# self.df[label + '_lagged_' + str(period)] = self.labels_df[label].shift(periods=period)
52 changes: 30 additions & 22 deletions data/preprocessing/single_prediction_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,8 @@
import requests_cache
from datetime import datetime, date, timedelta
from retry_requests import retry
from data.preprocessing.preprocess_features import PreprocessFeatures

from data.preprocessing.prepare_time_features import prepare_time_features

# TODO get these from metadata
feature_columns = ['ferien', 'feiertag', 'covid_19', 'olma_offa', 'temperature_2m_max',
'temperature_2m_min', 'rain_sum', 'snowfall_sum', 'sin_minute',
'cos_minute', 'sin_hour', 'cos_hour', 'sin_weekday', 'cos_weekday',
'sin_day', 'cos_day', 'sin_month', 'cos_month']
weather_api_url = "https://api.open-meteo.com/v1/forecast" # URL API


Expand All @@ -27,7 +21,15 @@ def __init__(self, raw_features_path):
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
self.openmeteo = openmeteo_requests.Client(session=retry_session)

def get_weather_forecast(self, params):
def get_weather_forecast(self, timestamp):
params = {
"latitude": 47.4239,
"longitude": 9.3748,
"daily": ["temperature_2m_max", "temperature_2m_min", "rain_sum", "snowfall_sum"],
"start_date": timestamp.strftime("%Y-%m-%d"),
"end_date": timestamp.strftime("%Y-%m-%d")
}

responses = self.openmeteo.weather_api(weather_api_url, params=params)
response = responses[0]

Expand All @@ -48,36 +50,42 @@ def get_weather_forecast(self, params):
daily_dataframe = pd.DataFrame(data=daily_data)
return daily_dataframe

# def get_parking_data_df(self):
# df_parking_data = pd.read_csv("temp-for-lagged.csv", sep=";")
# df_parking_data["datetime"] = pd.to_datetime(df_parking_data["datetime"], format='%d.%m.%Y %H:%M')
#
# df_parking_data.set_index("datetime", inplace=True)
#
# # df = pd.merge(df, df_parking_data, on="datetime", how="outer")
#
# print("df_parking_data")
# print(df_parking_data.head())
#
# return df_parking_data

def build_dataframe(self, input_date):
timestamp = datetime.strptime(input_date, '%Y-%m-%d %H:%M')

params = {
"latitude": 47.4239,
"longitude": 9.3748,
"daily": ["temperature_2m_max", "temperature_2m_min", "rain_sum", "snowfall_sum"],
"start_date": timestamp.strftime("%Y-%m-%d"),
"end_date": timestamp.strftime("%Y-%m-%d")
}

# Get Data
df_weather = self.get_weather_forecast(params)
df_weather = self.get_weather_forecast(timestamp)

# Merge Weather with other Features
df = pd.merge(df_weather, self.calendar_features, on="date", how="left")
df["datetime"] = timestamp
# df["datetime"] = pd.to_datetime(timestamp, format='%d.%m.%Y %H:%M')

# Add time-features
df = prepare_time_features(df)

df_filtered = df[feature_columns]
# parking_df = self.get_parking_data_df()

return df_filtered, len(feature_columns)
return PreprocessFeatures(df).get_features_for_model()


if __name__ == "__main__":
pd.set_option('display.max_columns', None)
date_today = date.today()
date_tomorrow = date_today + timedelta(days=1)
single_prediction_features = SinglePredictionFeatures("raw_features_2024.csv")
# print(date_tomorrow.strftime("%Y-%m-%d %H:%M"))
# print(date_today.strftime("%Y-%m-%d %H:%M"))
df_demo, features_length = single_prediction_features.build_dataframe(date_tomorrow.strftime("%Y-%m-%d %H:%M"))
print(df_demo.head())
print(df_demo.columns)
Expand Down
Binary file removed deploy/model_scripted.pt
Binary file not shown.
41 changes: 34 additions & 7 deletions deploy/single_prediction.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import torch
import json
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
from model.scaler import Scaler
from data.preprocessing.single_prediction_features import SinglePredictionFeatures
from data.metadata.metadata import parking_data_labels

batch_size = 1 # Required for model input
parking_data_labels = ["P24", "P44", "P42", "P33", "P23", "P25", "P21", "P31", "P53", "P32", "P22", "P52", "P51",
"P43"] # TODO get these from metadata file


class SinglePrediction:
def __init__(self, model_path, raw_features_path):
def __init__(self, model_path, scaler_path, raw_features_path, metadata_path):
self.metadata = json.load(open(metadata_path))
self.labels_readable = [self.metadata["parking_sg"]["fields"][field]["label"] for field in parking_data_labels]
self.max_capacity = [self.metadata["parking_sg"]["fields"][field]["max_cap"] for field in parking_data_labels]
self.scaler = Scaler.load(scaler_path)
self.single_prediction_features = SinglePredictionFeatures(raw_features_path)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = torch.jit.load(model_path, map_location=self.device)
Expand All @@ -24,17 +30,38 @@ def build_dataset(self, df):
def predict_with_model(self, dataloader, features_length):
data = next(iter(dataloader))[0]
data = data.view([batch_size, -1, features_length]).to(self.device)
return self.model(data).cpu()
return self.model(data).detach().cpu().numpy()

def pretty_prediction(self, output_scaled_back):
output_list = output_scaled_back.tolist()[0]

# TODO find cleaner solution.
rounded_list = [round(entry) for entry in output_list]
list_capped_min = [0 if entry < 0 else entry for entry in rounded_list]
list_capped_max = [self.max_capacity[i] if entry > self.max_capacity[i] else entry for i, entry in
enumerate(list_capped_min)]

return list_capped_max

def predict_for_date(self, date):
features_df, features_length = self.single_prediction_features.build_dataframe(date)
dataloader = self.build_dataset(features_df)

output = self.predict_with_model(dataloader, features_length)

return [dict(zip(parking_data_labels, row)) for row in output.tolist()]
output_scaled_back = self.scaler.inverse_transform(pd.DataFrame(output, columns=parking_data_labels))

return {
"predictions": self.pretty_prediction(output_scaled_back),
"labels": parking_data_labels,
"labels_readable": self.labels_readable,
"max_capacity": self.max_capacity
}


if __name__ == "__main__":
predict = SinglePrediction("model_scripted.pt", "../data/preprocessing/raw_features_2024.csv")
print(predict.predict_for_date("2023-10-09 00:00"))
predict = SinglePrediction("../model_scripted.pt", "../scaler.pkl", "../data/preprocessing/raw_features_2024.csv",
"../data/metadata/metadata.json")
print(predict.predict_for_date("2023-12-08 08:00"))
print(predict.predict_for_date("2023-12-10 18:00"))
print(predict.predict_for_date("2023-12-12 12:00"))

0 comments on commit c28e0f1

Please sign in to comment.