Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Norukh committed Dec 3, 2023
2 parents 9fc5763 + 63043bb commit c21a2e4
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 42 deletions.
Binary file added .DS_Store
Binary file not shown.
4 changes: 2 additions & 2 deletions lib/models/vanilla_lstm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import keras

from lib.model import BaseModel
from model import BaseModel

# the model expects training data in the shape: [samples, step size, features]

Expand All @@ -25,7 +25,7 @@ def train(self, train_x, train_y, epochs: int, safe_to: str):

def predict(self, data):
input_data = data.reshape((1, self.step_size, 1))
return self.model.predict(input_data)[0]
return self.model.predict(input_data, verbose=0)[0]

def summary(self):
self.model.summary()
97 changes: 88 additions & 9 deletions lib/path_api.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# path_api.py
from flask import Blueprint, jsonify, request, Response
import json
import datetime
from flask import Blueprint, jsonify, request
from models.vanilla_lstm import VanillaLSTM
from preprocessing import read_data
from map_service import MapService
from path_finder import PathFinder
import numpy as np
import pandas as pd

data_file = 'data/fill-level.csv'
data_file = 'data/days_merged.csv'

with open('.env', 'r') as fh:
vars_dict = dict(
Expand All @@ -15,20 +18,96 @@

n_sensors = 42 # Number of sensors in St. Gallen
station_0 = (47.4156038, 9.3325804) # Assumption: Empyting starts and ends at Kehrichtheizkraftwerk St.Gallen
sensor_data = read_data(data_file, use_coordinates=True)
sensor_data = sensor_data.loc[sensor_data.groupby('sensor_id').date.idxmax()] # Get sensor_id only once

columns = [
'sensor_id','date','geo_point_2d','level','type'
]
sensor_data_raw = pd.read_csv(data_file, delimiter=',', usecols=columns)

path_api = Blueprint('path_api', __name__)


STEP_SIZE = 5
model = VanillaLSTM(step_size=STEP_SIZE, load_from='trained_models/vanilla-lstm-1')


no_empty_if_below = 0.4
n_days = 5

def get_next_n_days(n_days: int, no_empty_if_below: float):
all_needed_time = []
all_needed_capacity = []
all_visited_locations = []
all_predictions = {}
sensor_data_copy = sensor_data_raw.copy() # holds the predicted values after first iteration

for i in range(n_days):
sensor_data = sensor_data_copy.loc[sensor_data_copy.groupby('sensor_id').date.idxmax()] # Get sensor_id only once

if i > 0 and np.count_nonzero([v[-1] for k,v in all_predictions.items()]) == 0:
# all containers have been emptied previously
print("All containers have been emptied...")
break

#all_predictions holds the predictions for each sensor per iteration of n_days

map_service = MapService(vars_dict["MAPS_KEY"], sensor_data, n_sensors, station_0, no_empty_if_below)
path_finder = PathFinder(map_service, sensor_data, station_0, n_sensors)

visited_stops, needed_time, visited_stations, needed_capacity = path_finder.find_path()
visited_stations_by_id = [x["sensor_id"] for x in visited_stations[1:-1]]
print(needed_capacity)
print(visited_stations_by_id)
most_left_point = np.argmin([float(x["lat"]) for x in visited_stations[1:-1]])
tour, locations = path_finder.refine_path(most_left_point+1, visited_stops) # +1 because visited_stations[1:-1]
locations = [station_0] + locations + [station_0]
print(tour)

all_needed_time.append(needed_time)
all_needed_capacity.append(needed_capacity)
all_visited_locations.append(locations)

# calculate predictions for next iteration
for sensor_id, values_raw in list(sensor_data_raw.groupby('sensor_id')):
last_5_values = values_raw.sort_values(by="date").tail(5-i)["level"].to_numpy()
if all_predictions.get(sensor_id):
# merge previous predictions with last n values
last_5_values = np.append(last_5_values, all_predictions[sensor_id])
else:
all_predictions[sensor_id] = []
for j in range(1, len(last_5_values)):
if (last_5_values[j] - last_5_values[j-1]) < -0.02:
# Data has been emtpied - set data before jump to 0
last_5_values[:j] = 0
all_predictions[sensor_id].append(model.predict(last_5_values).ravel()[0])

# add predictions to dataset for next iteration
pred_date = datetime.date.today() + datetime.timedelta(days=i)
for sensor_id, predictions in all_predictions.items():
#has_been_emptied = np.in1d(sensor_id, visited_stations_by_id)[0]
#if has_been_emptied:
# predictions[-1] = 0 # updates value in all_predictions

sensor = sensor_data[sensor_data["sensor_id"] == sensor_id].iloc[0]
new_entry = pd.Series({
'sensor_id': sensor_id,
'date': pred_date.strftime('%Y-%m-%d'),
'geo_point_2d': sensor["geo_point_2d"],
'level': predictions[-1], # has been emptied
'type': sensor["type"]
})
sensor_data_copy.loc[len(sensor_data_copy)] = new_entry

return all_needed_time, all_needed_capacity, all_visited_locations

#get_next_n_days(n_days, no_empty_if_below)

@path_api.route("", methods=['GET'])
def get_path():
selected_date = request.args.get('date')
no_empty_if_below = float(request.args.get('no_empty_if_below')) if request.args.get('no_empty_if_below') is not None else 0.4
glass_type_list = request.args.get('glass_type_list').split(",") if request.args.get('glass_type_list') is not None else None

map_service = MapService(vars_dict["MAPS_KEY"], sensor_data, n_sensors, station_0, no_empty_if_below)
path_finder = PathFinder(map_service, sensor_data, station_0, n_sensors)
all_needed_time, all_needed_capacity, all_visited_locations = get_next_n_days(5, no_empty_if_below)

_, needed_time, visited_locations = path_finder.find_path()
return jsonify({"visited_locations": visited_locations, "needed_time": needed_time})
return jsonify({"visited_locations": all_visited_locations, "needed_times": all_needed_time, "needed_capacities": all_needed_capacity})
58 changes: 32 additions & 26 deletions lib/path_finder.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
'''
Calculate optimal route to empty glass containers. Units are in seconds.
'''
from preprocessing import read_data
from map_service import MapService
import numpy as np
import pandas as pd
import os

create_map = True
show_min_max_markers = False
data_file = 'data/fill-level.csv'
data_file = 'data/days_merged.csv'
dist_file = 'data/distances.npy'
map_file = 'map-output.png'
map_file_refined = 'map-output-refined.png'

class PathFinder:
time_per_working_day = 8 * 60 * 60 # 8 hours in seconds
time_per_emptying = 60 * 60 # 15 minutes in seconds, 5 minutes per container
capacity = 10 - 1 # size of trough minus 1 container
time_per_working_day = 6 * 60 * 60 # 6 hours in seconds divided by 3 because only 40/120 containers have sensors
time_per_emptying = 15 * 60 # 15 minutes in seconds, 5 minutes per container

def __init__(self, map_service: MapService, sensor_data: pd.DataFrame, station_0: tuple, n_sensors: int):
self.sensor_data = sensor_data
Expand All @@ -35,48 +35,50 @@ def __init__(self, map_service: MapService, sensor_data: pd.DataFrame, station_
def find_path(self):
cost_matrix = self.map_service.get_costs(self.dist_matrix)

needed_capacity = 0
needed_time = 0
current_stop_idx = -1 #station_0 index
visited_stops = []
visited_stops = [current_stop_idx]
visited_locations = [self.station_0]

# distances[current_stop_idx, 0] is the time needed from the station to station_0
while (needed_time < (self.time_per_working_day - cost_matrix[current_stop_idx, -1] - self.time_per_emptying)):
visited_stops.append(current_stop_idx)

location_information = {}
location_information["lat"] = self.sensor_data.iloc[current_stop_idx]["geo_point_2d"].split(", ")[0]
location_information["lng"] = self.sensor_data.iloc[current_stop_idx]["geo_point_2d"].split(", ")[1]
location_information["level"] = self.sensor_data.iloc[current_stop_idx]["level"]
location_information["sensor_id"] = self.sensor_data.iloc[current_stop_idx]["sensor_id"]
location_information["date"] = self.sensor_data.iloc[current_stop_idx]["date"]
location_information["type"] = self.sensor_data.iloc[current_stop_idx]["type"].split(", ")[0]

visited_locations.append(location_information)

while (needed_time < (self.time_per_working_day - cost_matrix[current_stop_idx, -1] - self.time_per_emptying) and needed_capacity < self.capacity):
if len(visited_stops) == self.n_sensors+1:
# all stops visited
break

min_cost = np.min(np.delete(cost_matrix[current_stop_idx,:], visited_stops, axis=0)) # Min cost of unvisited stops
for idx in np.argwhere(cost_matrix[current_stop_idx,:] == min_cost).ravel():
if idx not in visited_stops:
next_stop_idx = int(idx)

actual_travel_time = self.sensor_data.iloc[next_stop_idx]["level"]
needed_capacity += self.sensor_data.iloc[next_stop_idx]["level"]
actual_travel_time = self.dist_matrix[current_stop_idx][next_stop_idx]
needed_time += actual_travel_time + self.time_per_emptying
current_stop_idx = next_stop_idx
visited_stops.append(next_stop_idx)
location_information = {}
location_information["lat"] = self.sensor_data.iloc[next_stop_idx]["geo_point_2d"].split(", ")[0]
location_information["lng"] = self.sensor_data.iloc[next_stop_idx]["geo_point_2d"].split(", ")[1]
location_information["level"] = self.sensor_data.iloc[next_stop_idx]["level"]
location_information["sensor_id"] = self.sensor_data.iloc[next_stop_idx]["sensor_id"]
location_information["date"] = self.sensor_data.iloc[next_stop_idx]["date"]
location_information["type"] = self.sensor_data.iloc[next_stop_idx]["type"].split(", ")[0]

visited_locations.append(location_information)

visited_stops.append(-1) # End at station_0
visited_locations.append(self.station_0)
needed_time += cost_matrix[current_stop_idx, -1] # Add time to go to station_0

return visited_stops, needed_time, visited_locations
return visited_stops, needed_time, visited_locations, needed_capacity

def refine_path(self, starting_point, visited_stops):
def refine_path(self, starting_point_idx, visited_stops):
# refine path using dijkstra
unvisited = visited_stops[1:-1]
tour = [starting_point] # Start from the first point
locations = [self.sensor_data.iloc[starting_point]["geo_point_2d"].split(", ")]
tour = [visited_stops[starting_point_idx]] # Start from the first point
unvisited.remove(tour[-1])
locations = [self.sensor_data.iloc[tour[-1]]["geo_point_2d"].split(", ")]

while unvisited:
current_point = tour[-1]
Expand All @@ -100,17 +102,21 @@ def refine_path(self, starting_point, visited_stops):
n_sensors = 42 # Number of sensors in St. Gallen
no_empty_if_below = 0.4
station_0 = (47.4156038, 9.3325804) # Assumption: Empyting starts and ends at Kehrichtheizkraftwerk St.Gallen
sensor_data = read_data(data_file, use_coordinates=True)
columns = [
'sensor_id','date','geo_point_2d','level','type'
]
sensor_data = pd.read_csv(data_file, delimiter=',', usecols=columns)
sensor_data = sensor_data.loc[sensor_data.groupby('sensor_id').date.idxmax()] # Get sensor_id only once

map_service = MapService(vars_dict["MAPS_KEY"], sensor_data, n_sensors, station_0, no_empty_if_below)
path_finder = PathFinder(map_service, sensor_data, station_0, n_sensors)

levels = [sensor_data.iloc[i]["level"] for i in range(n_sensors)]

visited_stops, needed_time, visited_locations = path_finder.find_path()
visited_stops, needed_time, visited_locations, needed_capacity = path_finder.find_path()

print("Needed time:", needed_time)
print("Needed capacity:", needed_capacity)
print("Path:")
for stop in visited_stops:
if stop != -1:
Expand All @@ -128,7 +134,7 @@ def refine_path(self, starting_point, visited_stops):
f.write(chunk)
f.close()

most_left_point = np.argmin([float(x[1]) for x in visited_locations])
most_left_point = np.argmin([float(x["lat"]) for x in visited_locations[1:-1]])
tour, locations = path_finder.refine_path(most_left_point, visited_stops)
locations = [station_0] + locations + [station_0]

Expand Down
14 changes: 9 additions & 5 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from numpy import array
import pandas as pd

from lib.models.vanilla_lstm import VanillaLSTM
from lib.preprocessing import read_data, sequence_data, get_sensor_values, get_training_data, split_data


STEP_SIZE = 5


raw_data = read_data('data/fill-level.csv')
columns = [
'sensor_id','date','geo_point_2d','level','type'
]
raw_data = pd.read_csv("data/days_merged.csv", delimiter=',', usecols=columns)
raw_data.sort_values(["sensor_id", "date"], inplace=True)
# test_sensor = get_sensor_values(data, '107075 | 2B2A')
raw_train, raw_test = split_data(data=raw_data, ratio=0.9)
train = get_training_data(raw_train)
Expand All @@ -19,12 +23,12 @@
# print(x, '->', train_y[index])

# EXAMPLE: Create model
'''

vanilla_lstm_model = VanillaLSTM(step_size=STEP_SIZE)
vanilla_lstm_model.train(train_x, train_y, 40, 'trained_models/vanilla-lstm')
vanilla_lstm_model.train(train_x, train_y, 3, 'trained_models/vanilla-lstm')
accuracy = vanilla_lstm_model.test(test_x, test_y)
print('accuracy:', round(100 * accuracy, 3), '%')
'''


# EXAMPLE: Load model from disk
model = VanillaLSTM(step_size=STEP_SIZE, load_from='trained_models/vanilla-lstm-1')
Expand Down

0 comments on commit c21a2e4

Please sign in to comment.