Skip to content

Commit

Permalink
Add / improve scalers
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-perseus committed Dec 2, 2023
1 parent bd0b336 commit a6ccf5a
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 21 deletions.
4 changes: 0 additions & 4 deletions data/preprocessing/preprocess_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ def get_features_for_model(self):
self.append_time_features()
# self.get_lagged_features()

print("elf.df.head()")
print(self.df.head())
print(self.df.columns)

return self.df[feature_columns], len(feature_columns)

def append_time_features(self):
Expand Down
81 changes: 64 additions & 17 deletions model/W&B_PPSG_LSTM.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@
" },\n",
" 'parameters': {\n",
" 'model': {\n",
" 'value': 'lstm'\n",
" 'values': ['lstm', 'rnn']\n",
" },\n",
" 'scaler': {\n",
" 'values': ['standard', 'minmax', 'robust', 'maxabs']\n",
" },\n",
" 'batch_size': {\n",
" 'distribution': 'q_log_uniform_values',\n",
Expand Down Expand Up @@ -195,7 +198,7 @@
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from torch.utils.data import TensorDataset, DataLoader\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler, RobustScaler\n",
"from models import LSTMModel, RNNModel, GRUModel\n",
"from scaler import Scaler\n",
"from data.metadata.metadata import feature_columns, parking_data_labels\n",
Expand All @@ -219,13 +222,13 @@
" X, y, input_dim, output_dim = load_features_labels(train_data_path)\n",
" X_train, X_val, y_train, y_val = split_train_val(X, y, config.train_val_ratio)\n",
" X_test, y_test, _, _ = load_features_labels(test_data_path)\n",
" scaler = Scaler(StandardScaler())\n",
" scaler = apply_scaler(config.scaler)\n",
" X_train_scaled, X_val_scaled, X_test_scaled, y_train_scaled, y_val_scaled, y_test_scaled = scaler.scale(X_train,\n",
" X_val,\n",
" X_test,\n",
" y_train,\n",
" y_val,\n",
" y_test)\n",
" X_val,\n",
" X_test,\n",
" y_train,\n",
" y_val,\n",
" y_test)\n",
" train_loader = build_dataset(config.batch_size, X_train_scaled, y_train_scaled)\n",
" val_loader = build_dataset(config.batch_size, X_val_scaled, y_val_scaled)\n",
" test_loader = build_dataset(config.batch_size, X_test_scaled, y_test_scaled)\n",
Expand All @@ -241,12 +244,8 @@
"\n",
" avg_test_loss, test_outputs, test_targets = test_network(network, test_loader, config.batch_size, input_dim)\n",
" wandb.log({\"loss (test)\": avg_test_loss})\n",
" plot_test_prediction(test_outputs, test_targets)\n",
"\n",
" model_scripted = torch.jit.script(network)\n",
" model_path = os.path.join(wandb.run.dir, \"model_scripted.pt\")\n",
" print(f\"Saving model to {model_path}\")\n",
" model_scripted.save(model_path)"
" plot_test_prediction(scaler, test_outputs, test_targets)\n",
" save_model_scaler(network, scaler)"
]
},
{
Expand All @@ -257,13 +256,13 @@
"def load_features_labels(csv_path):\n",
" df = pd.read_csv(csv_path, sep=\";\")\n",
"\n",
" preprocessFeatures = PreprocessFeatures(df)\n",
" preprocess_features = PreprocessFeatures(df)\n",
"\n",
" # TODO unify this\n",
" # df['datetime'] = pd.to_datetime(df['datetime'], format='%d.%m.%Y %H:%M')\n",
"\n",
" y = df[parking_data_labels]\n",
" X, input_dim = preprocessFeatures.get_features_for_model()\n",
" X, input_dim = preprocess_features.get_features_for_model()\n",
"\n",
" output_dim = len(y.columns)\n",
"\n",
Expand All @@ -283,6 +282,27 @@
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"def apply_scaler(scaler):\n",
" if scaler == \"standard\":\n",
" return Scaler(StandardScaler())\n",
" elif scaler == \"minmax\":\n",
" return Scaler(MinMaxScaler())\n",
" elif scaler == \"robust\":\n",
" return Scaler(RobustScaler())\n",
" elif scaler == \"maxabs\":\n",
" return Scaler(MaxAbsScaler())\n",
" else:\n",
" raise ValueError(f\"Invalid scaler value: {scaler}\")"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"metadata": {
Expand Down Expand Up @@ -398,8 +418,19 @@
"execution_count": null,
"outputs": [],
"source": [
"def plot_test_prediction(outputs, targets):\n",
"def inverse_transform(scaler, df, columns):\n",
" for col in columns:\n",
" df[col] = scaler.inverse_transform(df[col])\n",
" return df\n",
"\n",
"def plot_test_prediction(scaler, outputs, targets):\n",
" outputs = inverse_transform(scaler, pd.DataFrame(np.concatenate(outputs)), parking_data_labels)\n",
" targets = inverse_transform(scaler, pd.DataFrame(np.concatenate(targets)), parking_data_labels)\n",
"\n",
" for i, (output, target) in enumerate(zip(outputs, targets)):\n",
" if i % 10 != 0:\n",
" continue\n",
"\n",
" df_output = pd.DataFrame(output, columns=parking_data_labels)\n",
" df_target = pd.DataFrame(target, columns=parking_data_labels)\n",
"\n",
Expand Down Expand Up @@ -430,6 +461,22 @@
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"def save_model_scaler(network, scaler):\n",
" model_scripted = torch.jit.script(network)\n",
" model_path = os.path.join(wandb.run.dir, \"model_scripted.pt\")\n",
" print(f\"Saving model to {model_path}\")\n",
" model_scripted.save(model_path)\n",
" scaler.save(os.path.join(wandb.run.dir, \"scaler.pkl\"))"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"metadata": {
Expand Down

0 comments on commit a6ccf5a

Please sign in to comment.