Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Norukh committed Dec 3, 2023
2 parents c21a2e4 + 089400f commit 8a1217c
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 27 deletions.
22 changes: 18 additions & 4 deletions download-data.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
#!/bin/bash

url='https://www.daten.stadt.sg.ch/api/explore/v2.1/catalog/datasets/fullstandssensoren-sammelstellen-stadt-stgallen/exports/csv?lang=en&timezone=Europe%2FZurich&use_labels=true&delimiter=%3B'
output_file='data/fill-levels-2.csv'
url_w='https://www.daten.stadt.sg.ch/api/explore/v2.1/catalog/datasets/fuellstandsensoren-glassammelstellen-weissglas/exports/csv?lang=de&timezone=Europe%2FZurich&use_labels=true&delimiter=%3B'
output_file_w='data/data_w.csv'

curl -Lo "$output_file" "$url"
curl -Lo "$output_file_w" "$url_w"

echo "Download complete. File saved to $output_file."
echo "Download (w) complete. File saved to $output_file_w."

url_b='https://www.daten.stadt.sg.ch/api/explore/v2.1/catalog/datasets/fuellstandsensoren-glassammelstellen-braunglas/exports/csv?lang=de&timezone=Europe%2FZurich&use_labels=true&delimiter=%3B'
output_file_b='data/data_b.csv'

curl -Lo "$output_file_b" "$url_b"

echo "Download (w) complete. File saved to $output_file_b."

url_g='https://www.daten.stadt.sg.ch/api/explore/v2.1/catalog/datasets/fuellstandsensoren-glassammelstellen-gruenglas/exports/csv?lang=de&timezone=Europe%2FZurich&use_labels=true&delimiter=%3B'
output_file_g='data/data_g.csv'

curl -Lo "$output_file_g" "$url_g"

echo "Download (w) complete. File saved to $output_file_g."
58 changes: 47 additions & 11 deletions lib/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,59 @@
from numpy import array
import pandas as pd


MAX_SENSOR_INPUT = 1600


def read_data(from_path: str, use_coordinates: bool = False):
def read_data(from_path_w: str, from_path_g: str, from_path_b: str, use_coordinates: bool = True):
columns = [
'Gemessen am',
'Tags',
'Füllstandsdistanz',
'Sensorname',
'measured_at',
'data_distance',
'name',
]

if use_coordinates:
columns.append('geo_point_2d')
# TODO: needs to be implemented in later functions (e.g. _merge_days) if really needed


raw_data_w = pd.read_csv(from_path_w, delimiter=';', usecols=columns)
raw_data_w = raw_data_w.rename(columns={'measured_at': 'date', 'data_distance': 'level', 'name': 'sensor_id'})

#adding type column based on name of csv input file
glasstype="Weissglas"
raw_data_w ['type'] = glasstype
filtered_data_w = raw_data_w[raw_data_w['level'].notna()] # filter all rows with invalid sensor data
days_merged_w = _merge_days(filtered_data_w, use_coordinates)
days_merged_w['level'] = days_merged_w['level'].apply(lambda level: normalize_data(level)) # normalise data

raw_data_g = pd.read_csv(from_path_g, delimiter=';', usecols=columns)
raw_data_g = raw_data_g.rename(columns={'measured_at': 'date', 'data_distance': 'level', 'name': 'sensor_id'})

#adding type column based on name of csv input file
glasstype="Grünglas"
raw_data_g ['type'] = glasstype
filtered_data_g = raw_data_g[raw_data_g['level'].notna()] # filter all rows with invalid sensor data
days_merged_g = _merge_days(filtered_data_g, use_coordinates)
days_merged_g['level'] = days_merged_g['level'].apply(lambda level: normalize_data(level)) # normalise data

raw_data_b = pd.read_csv(from_path_w, delimiter=';', usecols=columns)
raw_data_b = raw_data_b.rename(columns={'measured_at': 'date', 'data_distance': 'level', 'name': 'sensor_id'})

#adding type column based on name of csv input file
glasstype="Braunglas"
raw_data_b ['type'] = glasstype
filtered_data_b = raw_data_b[raw_data_b['level'].notna()] # filter all rows with invalid sensor data
days_merged_b = _merge_days(filtered_data_b, use_coordinates)
days_merged_b['level'] = days_merged_b['level'].apply(lambda level: normalize_data(level)) # normalise data

raw_data = pd.read_csv(from_path, delimiter=';', usecols=columns)
raw_data = raw_data.rename(columns={'Gemessen am': 'date', 'Tags': 'type', 'Füllstandsdistanz': 'level', 'Sensorname': 'sensor_id'})

filtered_data = raw_data[raw_data['level'].notna()] # filter all rows with invalid sensor data
days_merged = _merge_days(filtered_data, use_coordinates)
days_merged['level'] = days_merged['level'].apply(lambda level: normalize_data(level)) # normalise data

days_merged=pd.concat([days_merged_b,days_merged_g, days_merged_w]).sort_values(by="date")


return days_merged


def data_split(data: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
train_data = data[0:int(len(data)*0.9)]
validate_data = data [int(len(data)*0.9):]
Expand Down Expand Up @@ -76,3 +106,9 @@ def check_if_array_is_ascending(array: list[float]) -> bool:
def split_data(data: pd.DataFrame, ratio: float) -> tuple[pd.DataFrame, pd.DataFrame]:
total_len = len(data)
return data[0:round(total_len * ratio)], data[round(total_len * ratio):]


if __name__ == '__main__':
file_path = "data/days_merged.csv"
days_merged = read_data("data/data_w.csv", "data/data_g.csv", "data/data_b.csv")
days_merged.to_csv(file_path,sep=',', index=False)
8 changes: 6 additions & 2 deletions lib/sensor_api.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
# sensor_api.py
from flask import Blueprint, jsonify
from preprocessing import read_data
import pandas as pd

sensor_api = Blueprint('sensor_api', __name__)

data_file = 'data/fill-level.csv'
data_file = 'data/days_merged.csv'

sensor_data = read_data(data_file, use_coordinates=True)
columns = [
'sensor_id','date','geo_point_2d','level','type'
]
sensor_data = pd.read_csv(data_file, delimiter=',', usecols=columns)
sensor_data = sensor_data.loc[sensor_data.groupby('sensor_id').date.idxmax()] # Get sensor_id only once


Expand Down
2 changes: 1 addition & 1 deletion predictor-app/src/api.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* Use the exported functions to call the API.
* If necessary, adjust the backend address below:
*/
const backend = "http://localhost:5000";
const backend = "http://127.0.0.1:5000";

export function getPath(params) {
return getJson("/path", params).then(parseJSON);
Expand Down
27 changes: 18 additions & 9 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Open Data Hack 2023

**Goal:** Use an LSTM AI model to predict the fill levels of recycling stations in St. Gallen
and create a pathfinding algorithm that finds the ideal routes for city employees, for the next week.
and create a pathfinding algorithm that finds the ideal routes for city employees, for the next work week.

### Components

Expand All @@ -10,21 +10,30 @@ and create a pathfinding algorithm that finds the ideal routes for city employee
3) Visualise optimal route with a web app

### Setup
Create a conda environment with `conda create --name <env> --file requirements.txt`. We recommend using python version 3.9.

Download the file [here](https://www.daten.stadt.sg.ch/explore/dataset/fullstandssensoren-sammelstellen-stadt-stgallen/export/?disjunctive.name&disjunctive.tags&sort=measured_at)
and safe it at `data/fill-level.csv` or use the `download-data.sh` script. Or similar
Create a Google Maps API key and save it for later steps (you can easily find the documentation online).

#### Update
The API has changed. The API has been split up in 3 parts for each glass type.
The new API is documented as follows:
Run the bash script `download-data.sh`. This will download the data to the data folder. Afterwards run `preprocessing.py` directly which will create a single csv file from all the 3 datasets.

##### Alternative Setup:
Download the files below into the data folder:
- [Füllstandsensoren Glassammelstellen (Weissglas)](https://www.daten.stadt.sg.ch/explore/dataset/fuellstandsensoren-glassammelstellen-weissglas/table/?disjunctive.device_id&disjunctive.name)
- [Füllstandsensoren Glassammelstellen (Grünglas)](https://www.daten.stadt.sg.ch/explore/dataset/fuellstandsensoren-glassammelstellen-gruenglas/table/?disjunctive.device_id&disjunctive.name)
- [Füllstandsensoren Glassammelstellen (Braunglas)](https://www.daten.stadt.sg.ch/explore/dataset/fuellstandsensoren-glassammelstellen-braunglas/table/?disjunctive.device_id&disjunctive.name)

### API
#### Train the model
To train the model, the datasets must be available and merged (as done in the Setup step). The file must be available under data/days_merged.csv. Afterwards, you can execute `main.py` in the project root which will train the model and save a snapshot to the trained-models/ folder.

#### API
The Google Maps API key must be saved under as `MAPS_KEY=<key>` in a .env file in the project root.

In order to use the model, the datasets must be available and merged (as done in the Setup step). Also, the model must be trained. Afterwards, the command `flask --app lib/api run` can be executed from the project root.

#### UI
The Google Maps API key must be saved in a second location as `REACT_APP_GOOGLE_MAPS_API_KEY=<key>` in a .env file in the predictor-app/ folder.

Use the generate_dummy_data method to receive random sensor values. The model should return the prediction the same way.
The method takes a list of the requested sensor names as argument.
The UI is located in the predictor-app/ folder. First run `npm install` in the given folder, the start it using `npm run start`.

### Ideas
Ideas that can be explored if there is time:
Expand Down
116 changes: 116 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: osx-arm64
absl-py=2.0.0=pypi_0
astunparse=1.6.3=pypi_0
blinker=1.7.0=pyhd8ed1ab_0
brotli=1.1.0=hb547adb_1
brotli-bin=1.1.0=hb547adb_1
bzip2=1.0.8=h93a5062_5
ca-certificates=2023.11.17=hf0a4a13_0
cachetools=5.3.2=pypi_0
certifi=2023.11.17=pyhd8ed1ab_0
charset-normalizer=3.3.2=pypi_0
click=8.1.7=unix_pyh707e725_0
contourpy=1.2.0=py39he9de807_0
cycler=0.12.1=pyhd8ed1ab_0
flask=3.0.0=pyhd8ed1ab_0
flatbuffers=23.5.26=pypi_0
fonttools=4.45.1=py39h17cfd9d_0
freetype=2.12.1=hadb7bae_2
gast=0.5.4=pypi_0
google-auth=2.24.0=pypi_0
google-auth-oauthlib=1.1.0=pypi_0
google-pasta=0.2.0=pypi_0
googlemaps=4.10.0=pypi_0
grpcio=1.59.3=pypi_0
h5py=3.10.0=pypi_0
idna=3.6=pypi_0
importlib-metadata=6.9.0=pyha770c72_0
importlib-resources=6.1.1=pyhd8ed1ab_0
importlib_resources=6.1.1=pyhd8ed1ab_0
itsdangerous=2.1.2=pyhd8ed1ab_0
jinja2=3.1.2=pyhd8ed1ab_1
joblib=1.3.2=pyhd8ed1ab_0
keras=2.15.0=pypi_0
kiwisolver=1.4.5=py39hbd775c9_1
lcms2=2.15=hf2736f0_3
lerc=4.0.0=h9a09cb3_0
libblas=3.9.0=20_osxarm64_openblas
libbrotlicommon=1.1.0=hb547adb_1
libbrotlidec=1.1.0=hb547adb_1
libbrotlienc=1.1.0=hb547adb_1
libcblas=3.9.0=20_osxarm64_openblas
libclang=16.0.6=pypi_0
libcxx=16.0.6=h4653b0c_0
libdeflate=1.19=hb547adb_0
libffi=3.4.2=h3422bc3_5
libgfortran=5.0.0=13_2_0_hd922786_1
libgfortran5=13.2.0=hf226fd6_1
libjpeg-turbo=3.0.0=hb547adb_1
liblapack=3.9.0=20_osxarm64_openblas
libopenblas=0.3.25=openmp_h6c19121_0
libpng=1.6.39=h76d750c_0
libsqlite=3.44.2=h091b4b1_0
libtiff=4.6.0=ha8a6c65_2
libwebp-base=1.3.2=hb547adb_0
libxcb=1.15=hf346824_0
libzlib=1.2.13=h53f4e23_5
llvm-openmp=17.0.6=hcd81f8e_0
markdown=3.5.1=pypi_0
markupsafe=2.1.3=py39h0f82c59_1
matplotlib=3.8.2=py39hdf13c20_0
matplotlib-base=3.8.2=py39h1a09f3e_0
ml-dtypes=0.2.0=pypi_0
munkres=1.1.4=pyh9f0ad1d_0
ncurses=6.4=h463b476_2
numpy=1.26.2=py39heee92a0_0
oauthlib=3.2.2=pypi_0
openjpeg=2.5.0=h4c1507b_3
openssl=3.2.0=h0d3ecfb_1
opt-einsum=3.3.0=pypi_0
packaging=23.2=pyhd8ed1ab_0
pandas=2.1.3=py39hf8cecc8_0
pillow=10.1.0=py39h755f0b7_0
pip=23.3.1=pyhd8ed1ab_0
protobuf=4.23.4=pypi_0
pthread-stubs=0.4=h27ca646_1001
pyasn1=0.5.1=pypi_0
pyasn1-modules=0.3.0=pypi_0
pyparsing=3.1.1=pyhd8ed1ab_0
python=3.9.18=hfa1ae8a_0_cpython
python-dateutil=2.8.2=pyhd8ed1ab_0
python-tzdata=2023.3=pyhd8ed1ab_0
python_abi=3.9=4_cp39
pytz=2023.3.post1=pyhd8ed1ab_0
readline=8.2=h92ec313_1
requests=2.31.0=pypi_0
requests-oauthlib=1.3.1=pypi_0
rsa=4.9=pypi_0
scikit-learn=1.3.2=py39h172c841_1
scipy=1.11.4=py39h36c428d_0
setuptools=68.2.2=pyhd8ed1ab_0
six=1.16.0=pyh6c4a22f_0
tensorboard=2.15.1=pypi_0
tensorboard-data-server=0.7.2=pypi_0
tensorflow=2.15.0=pypi_0
tensorflow-estimator=2.15.0=pypi_0
tensorflow-io-gcs-filesystem=0.34.0=pypi_0
tensorflow-macos=2.15.0=pypi_0
tensorflow-metal=1.1.0=pypi_0
termcolor=2.4.0=pypi_0
threadpoolctl=3.2.0=pyha21a80b_0
tk=8.6.13=h5083fa2_1
tornado=6.3.3=py39h0f82c59_1
typing-extensions=4.8.0=pypi_0
tzdata=2023c=h71feb2d_0
unicodedata2=15.1.0=py39h0f82c59_0
urllib3=2.1.0=pypi_0
werkzeug=3.0.1=pyhd8ed1ab_0
wheel=0.42.0=pyhd8ed1ab_0
wrapt=1.14.1=pypi_0
xorg-libxau=1.0.11=hb547adb_0
xorg-libxdmcp=1.1.3=h27ca646_0
xz=5.2.6=h57fd34a_0
zipp=3.17.0=pyhd8ed1ab_0
zstd=1.5.5=h4f39d0f_0

0 comments on commit 8a1217c

Please sign in to comment.