Merge https://github.com/kevinloeffler/opendata-hackathon

kevinloeffler · Dec 3, 2023 · c21a2e4 · c21a2e4
2 parents 9fc5763 + 63043bb
commit c21a2e4
Show file tree

Hide file tree

Showing 5 changed files with 131 additions and 42 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/lib/models/vanilla_lstm.py b/lib/models/vanilla_lstm.py
@@ -1,6 +1,6 @@
 import keras
 
-from lib.model import BaseModel
+from model import BaseModel
 
 # the model expects training data in the shape: [samples, step size, features]
 
@@ -25,7 +25,7 @@ def train(self, train_x, train_y, epochs: int, safe_to: str):
 
     def predict(self, data):
         input_data = data.reshape((1, self.step_size, 1))
-        return self.model.predict(input_data)[0]
+        return self.model.predict(input_data, verbose=0)[0]
 
     def summary(self):
         self.model.summary()
diff --git a/lib/path_api.py b/lib/path_api.py
@@ -1,11 +1,14 @@
 # path_api.py
-from flask import Blueprint, jsonify, request, Response
-import json
+import datetime
+from flask import Blueprint, jsonify, request
+from models.vanilla_lstm import VanillaLSTM
 from preprocessing import read_data
 from map_service import MapService
 from path_finder import PathFinder
+import numpy as np
+import pandas as pd
 
-data_file = 'data/fill-level.csv'
+data_file = 'data/days_merged.csv'
 
 with open('.env', 'r') as fh:
     vars_dict = dict(
@@ -15,20 +18,96 @@
 
 n_sensors = 42 # Number of sensors in St. Gallen
 station_0 = (47.4156038, 9.3325804) # Assumption: Empyting starts and ends at Kehrichtheizkraftwerk St.Gallen
-sensor_data = read_data(data_file, use_coordinates=True)
-sensor_data = sensor_data.loc[sensor_data.groupby('sensor_id').date.idxmax()] # Get sensor_id only once
+
+columns = [
+    'sensor_id','date','geo_point_2d','level','type'
+]
+sensor_data_raw = pd.read_csv(data_file, delimiter=',', usecols=columns)
 
 path_api = Blueprint('path_api', __name__)
 
 
+STEP_SIZE = 5
+model = VanillaLSTM(step_size=STEP_SIZE, load_from='trained_models/vanilla-lstm-1')
+
+
+no_empty_if_below = 0.4
+n_days = 5
+
+def get_next_n_days(n_days: int, no_empty_if_below: float):
+    all_needed_time = []
+    all_needed_capacity = []
+    all_visited_locations = []
+    all_predictions = {}
+    sensor_data_copy = sensor_data_raw.copy() # holds the predicted values after first iteration
+
+    for i in range(n_days):
+        sensor_data = sensor_data_copy.loc[sensor_data_copy.groupby('sensor_id').date.idxmax()] # Get sensor_id only once
+
+        if i > 0 and np.count_nonzero([v[-1] for k,v in all_predictions.items()]) == 0:
+            # all containers have been emptied previously
+            print("All containers have been emptied...")
+            break
+
+        #all_predictions holds the predictions for each sensor per iteration of n_days
+
+        map_service = MapService(vars_dict["MAPS_KEY"], sensor_data, n_sensors, station_0, no_empty_if_below)
+        path_finder = PathFinder(map_service, sensor_data, station_0, n_sensors)
+
+        visited_stops, needed_time, visited_stations, needed_capacity = path_finder.find_path()
+        visited_stations_by_id = [x["sensor_id"] for x in visited_stations[1:-1]]
+        print(needed_capacity)
+        print(visited_stations_by_id)
+        most_left_point = np.argmin([float(x["lat"]) for x in visited_stations[1:-1]])
+        tour, locations = path_finder.refine_path(most_left_point+1, visited_stops) # +1 because visited_stations[1:-1]
+        locations = [station_0] + locations + [station_0]
+        print(tour)
+
+        all_needed_time.append(needed_time)
+        all_needed_capacity.append(needed_capacity)
+        all_visited_locations.append(locations)
+
+        # calculate predictions for next iteration
+        for sensor_id, values_raw in list(sensor_data_raw.groupby('sensor_id')):
+            last_5_values = values_raw.sort_values(by="date").tail(5-i)["level"].to_numpy()
+            if all_predictions.get(sensor_id):
+                # merge previous predictions with last n values
+                last_5_values = np.append(last_5_values, all_predictions[sensor_id])
+            else:
+                all_predictions[sensor_id] = []
+            for j in range(1, len(last_5_values)):
+                if (last_5_values[j] - last_5_values[j-1]) < -0.02:
+                    # Data has been emtpied - set data before jump to 0
+                    last_5_values[:j] = 0
+            all_predictions[sensor_id].append(model.predict(last_5_values).ravel()[0])
+
+        # add predictions to dataset for next iteration
+        pred_date = datetime.date.today() + datetime.timedelta(days=i)
+        for sensor_id, predictions in all_predictions.items():
+            #has_been_emptied = np.in1d(sensor_id, visited_stations_by_id)[0]
+            #if has_been_emptied:
+            #    predictions[-1] = 0 # updates value in all_predictions
+
+            sensor = sensor_data[sensor_data["sensor_id"] == sensor_id].iloc[0]
+            new_entry = pd.Series({
+                'sensor_id': sensor_id, 
+                'date': pred_date.strftime('%Y-%m-%d'), 
+                'geo_point_2d': sensor["geo_point_2d"],
+                'level': predictions[-1], # has been emptied
+                'type': sensor["type"]
+            })
+            sensor_data_copy.loc[len(sensor_data_copy)] = new_entry
+
+    return all_needed_time, all_needed_capacity, all_visited_locations
+
+#get_next_n_days(n_days, no_empty_if_below)
+
 @path_api.route("", methods=['GET'])
 def get_path():
     selected_date = request.args.get('date')
     no_empty_if_below = float(request.args.get('no_empty_if_below')) if request.args.get('no_empty_if_below') is not None else 0.4
     glass_type_list = request.args.get('glass_type_list').split(",") if request.args.get('glass_type_list') is not None else None
 
-    map_service = MapService(vars_dict["MAPS_KEY"], sensor_data, n_sensors, station_0, no_empty_if_below)
-    path_finder = PathFinder(map_service, sensor_data, station_0, n_sensors)
+    all_needed_time, all_needed_capacity, all_visited_locations = get_next_n_days(5, no_empty_if_below)
 
-    _, needed_time, visited_locations = path_finder.find_path()
-    return jsonify({"visited_locations": visited_locations, "needed_time": needed_time})
+    return jsonify({"visited_locations": all_visited_locations, "needed_times": all_needed_time, "needed_capacities": all_needed_capacity})
diff --git a/lib/path_finder.py b/lib/path_finder.py
@@ -1,22 +1,22 @@
 '''
 Calculate optimal route to empty glass containers. Units are in seconds. 
 '''
-from preprocessing import read_data
 from map_service import MapService
 import numpy as np
 import pandas as pd
 import os
 
 create_map = True
 show_min_max_markers = False
-data_file = 'data/fill-level.csv'
+data_file = 'data/days_merged.csv'
 dist_file = 'data/distances.npy'
 map_file = 'map-output.png'
 map_file_refined = 'map-output-refined.png'
 
 class PathFinder:
-    time_per_working_day = 8 * 60 * 60 # 8 hours in seconds
-    time_per_emptying = 60 * 60 # 15 minutes in seconds, 5 minutes per container
+    capacity = 10 - 1 # size of trough minus 1 container
+    time_per_working_day = 6 * 60 * 60 # 6 hours in seconds divided by 3 because only 40/120 containers have sensors
+    time_per_emptying = 15 * 60 # 15 minutes in seconds, 5 minutes per container
 
     def __init__(self,  map_service: MapService, sensor_data: pd.DataFrame, station_0: tuple, n_sensors: int):
         self.sensor_data = sensor_data
@@ -35,48 +35,50 @@ def __init__(self,  map_service: MapService, sensor_data: pd.DataFrame, station_
     def find_path(self):
         cost_matrix = self.map_service.get_costs(self.dist_matrix)
 
+        needed_capacity = 0
         needed_time = 0
         current_stop_idx = -1 #station_0 index
-        visited_stops = []
+        visited_stops = [current_stop_idx]
         visited_locations = [self.station_0]
 
         # distances[current_stop_idx, 0] is the time needed from the station to station_0
-        while (needed_time < (self.time_per_working_day - cost_matrix[current_stop_idx, -1] - self.time_per_emptying)):
-            visited_stops.append(current_stop_idx)
-
-            location_information = {}
-            location_information["lat"] = self.sensor_data.iloc[current_stop_idx]["geo_point_2d"].split(", ")[0]
-            location_information["lng"] = self.sensor_data.iloc[current_stop_idx]["geo_point_2d"].split(", ")[1]
-            location_information["level"] = self.sensor_data.iloc[current_stop_idx]["level"]
-            location_information["sensor_id"] = self.sensor_data.iloc[current_stop_idx]["sensor_id"]
-            location_information["date"] = self.sensor_data.iloc[current_stop_idx]["date"]
-            location_information["type"] = self.sensor_data.iloc[current_stop_idx]["type"].split(", ")[0]
-
-            visited_locations.append(location_information)
-
+        while (needed_time < (self.time_per_working_day - cost_matrix[current_stop_idx, -1] - self.time_per_emptying) and needed_capacity < self.capacity):
             if len(visited_stops) == self.n_sensors+1:
                 # all stops visited
                 break
+
             min_cost = np.min(np.delete(cost_matrix[current_stop_idx,:], visited_stops, axis=0)) # Min cost of unvisited stops
             for idx in np.argwhere(cost_matrix[current_stop_idx,:] == min_cost).ravel():
                 if idx not in visited_stops:
                     next_stop_idx = int(idx)
 
-            actual_travel_time = self.sensor_data.iloc[next_stop_idx]["level"]
+            needed_capacity += self.sensor_data.iloc[next_stop_idx]["level"]
+            actual_travel_time = self.dist_matrix[current_stop_idx][next_stop_idx]
             needed_time += actual_travel_time + self.time_per_emptying
             current_stop_idx = next_stop_idx
+            visited_stops.append(next_stop_idx)
+            location_information = {}
+            location_information["lat"] = self.sensor_data.iloc[next_stop_idx]["geo_point_2d"].split(", ")[0]
+            location_information["lng"] = self.sensor_data.iloc[next_stop_idx]["geo_point_2d"].split(", ")[1]
+            location_information["level"] = self.sensor_data.iloc[next_stop_idx]["level"]
+            location_information["sensor_id"] = self.sensor_data.iloc[next_stop_idx]["sensor_id"]
+            location_information["date"] = self.sensor_data.iloc[next_stop_idx]["date"]
+            location_information["type"] = self.sensor_data.iloc[next_stop_idx]["type"].split(", ")[0]
+
+            visited_locations.append(location_information)
 
         visited_stops.append(-1) # End at station_0
         visited_locations.append(self.station_0)
         needed_time += cost_matrix[current_stop_idx, -1] # Add time to go to station_0
 
-        return visited_stops, needed_time, visited_locations
+        return visited_stops, needed_time, visited_locations, needed_capacity
 
-    def refine_path(self, starting_point, visited_stops):
+    def refine_path(self, starting_point_idx, visited_stops):
         # refine path using dijkstra
         unvisited = visited_stops[1:-1]
-        tour = [starting_point] # Start from the first point
-        locations = [self.sensor_data.iloc[starting_point]["geo_point_2d"].split(", ")]
+        tour = [visited_stops[starting_point_idx]] # Start from the first point
+        unvisited.remove(tour[-1])
+        locations = [self.sensor_data.iloc[tour[-1]]["geo_point_2d"].split(", ")]
 
         while unvisited:
             current_point = tour[-1]
@@ -100,17 +102,21 @@ def refine_path(self, starting_point, visited_stops):
     n_sensors = 42 # Number of sensors in St. Gallen
     no_empty_if_below = 0.4
     station_0 = (47.4156038, 9.3325804) # Assumption: Empyting starts and ends at Kehrichtheizkraftwerk St.Gallen
-    sensor_data = read_data(data_file, use_coordinates=True)
+    columns = [
+        'sensor_id','date','geo_point_2d','level','type'
+    ]
+    sensor_data = pd.read_csv(data_file, delimiter=',', usecols=columns)
     sensor_data = sensor_data.loc[sensor_data.groupby('sensor_id').date.idxmax()] # Get sensor_id only once
 
     map_service = MapService(vars_dict["MAPS_KEY"], sensor_data, n_sensors, station_0, no_empty_if_below)
     path_finder = PathFinder(map_service, sensor_data, station_0, n_sensors)
 
     levels = [sensor_data.iloc[i]["level"] for i in range(n_sensors)]
 
-    visited_stops, needed_time, visited_locations = path_finder.find_path()
+    visited_stops, needed_time, visited_locations, needed_capacity = path_finder.find_path()
 
     print("Needed time:", needed_time)
+    print("Needed capacity:", needed_capacity)
     print("Path:")
     for stop in visited_stops:
         if stop != -1:
@@ -128,7 +134,7 @@ def refine_path(self, starting_point, visited_stops):
                 f.write(chunk)
         f.close()
 
-    most_left_point = np.argmin([float(x[1]) for x in visited_locations])
+    most_left_point = np.argmin([float(x["lat"]) for x in visited_locations[1:-1]])
     tour, locations = path_finder.refine_path(most_left_point, visited_stops)
     locations = [station_0] + locations + [station_0]
 

diff --git a/main.py b/main.py
@@ -1,13 +1,17 @@
 from numpy import array
+import pandas as pd
 
 from lib.models.vanilla_lstm import VanillaLSTM
 from lib.preprocessing import read_data, sequence_data, get_sensor_values, get_training_data, split_data
 
 
 STEP_SIZE = 5
 
-
-raw_data = read_data('data/fill-level.csv')
+columns = [
+    'sensor_id','date','geo_point_2d','level','type'
+]
+raw_data = pd.read_csv("data/days_merged.csv", delimiter=',', usecols=columns)
+raw_data.sort_values(["sensor_id", "date"], inplace=True)
 # test_sensor = get_sensor_values(data, '107075 | 2B2A')
 raw_train, raw_test = split_data(data=raw_data, ratio=0.9)
 train = get_training_data(raw_train)
@@ -19,12 +23,12 @@
 #    print(x, '->', train_y[index])
 
 # EXAMPLE: Create model
-'''
+
 vanilla_lstm_model = VanillaLSTM(step_size=STEP_SIZE)
-vanilla_lstm_model.train(train_x, train_y, 40, 'trained_models/vanilla-lstm')
+vanilla_lstm_model.train(train_x, train_y, 3, 'trained_models/vanilla-lstm')
 accuracy = vanilla_lstm_model.test(test_x, test_y)
 print('accuracy:', round(100 * accuracy, 3), '%')
-'''
+
 
 # EXAMPLE: Load model from disk
 model = VanillaLSTM(step_size=STEP_SIZE, load_from='trained_models/vanilla-lstm-1')