## Import Libraries

In [9]:
import config
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sqlalchemy import text
from tensorflow import keras
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score

## Load and prepare data

In [10]:
# Lade den Datensatz aus DB
SQLquery = text('SELECT * FROM ' + config.db_weather_history + ' AS w JOIN ' + config.db_AQI_history + ' AS aqi ON w."datetime" = aqi."Datum"')
data = pd.read_sql(SQLquery, con=config.db_login.connect())

# Entferne Datensätze mit leeren Werten in den angegebenen Spalten
data.dropna(subset=['Wert', 'precipprob', 'windgust', 'visibility'], inplace=True)

## Split the data into training and testing sets:

In [11]:
# Aufteilung der Daten in Features (X) und Luftqualität (y)
X = data[['precipprob', 'windgust', 'visibility']]
y = data['Wert']

# Aufteilung der Daten in Trainings- und Testsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model Creation (Train and Predict)

In [12]:
# Lineare Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_predictions = linear_model.predict(X_test)

In [13]:
# Entscheidungsbaum
tree_model = DecisionTreeRegressor()
tree_model.fit(X_train, y_train)
tree_predictions = tree_model.predict(X_test)

In [14]:
# Random Forest
forest_model = RandomForestRegressor()
forest_model.fit(X_train, y_train)
forest_predictions = forest_model.predict(X_test)

In [15]:
# Neuronales Netzwerk
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=32)
nn_predictions = model.predict(X_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Model Evaluation

In [16]:
# Linear Regression 
linear_rmse = mean_squared_error(y_test, linear_predictions, squared=False)
print("Lineare Regression RMSE:", linear_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, linear_predictions)
print("Linear Regression Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, linear_predictions)
print("Linear Regression Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, linear_predictions)
print("Linear Regression R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, linear_predictions)
print("Linear Regression Explained Variance Score:", explained_variance)

# ------
print("-------")
# Decision Tree
tree_rmse = mean_squared_error(y_test, tree_predictions, squared=False)
print("Decision Tree RMSE:", tree_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, tree_predictions)
print("Decision Tree Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, tree_predictions)
print("Decision Tree Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, tree_predictions)
print("Decision Tree R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, tree_predictions)
print("Decision Tree Explained Variance Score:", explained_variance)

# ------
print("-------")
# Random Forest 
forest_rmse = mean_squared_error(y_test, forest_predictions, squared=False)
print("Random Forest RMSE:", forest_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, forest_predictions)
print("Random Forest Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, forest_predictions)
print("Random Forest Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, forest_predictions)
print("Random Forest R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, forest_predictions)
print("Random Forest Explained Variance Score:", explained_variance)

# ------
print("-------")
# Neuronal Network 
nn_rmse = mean_squared_error(y_test, nn_predictions, squared=False)
print("Neuronal Network RMSE:", nn_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, nn_predictions)
print("Neuronal Network Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, nn_predictions)
print("Neuronal Network Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, nn_predictions)
print("Neuronal Network R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, nn_predictions)
print("Neuronal Network Explained Variance Score:", explained_variance)


Lineare Regression RMSE: 10.585922168900106
Linear Regression Mean Absolute Error (MAE): 7.474681128697551
Linear Regression Mean Squared Error (MSE): 112.06174816601073
Linear Regression R^2 Score: 0.27852461639661275
Linear Regression Explained Variance Score: 0.2785247552922785
-------
Decision Tree RMSE: 1.7183659812628398
Decision Tree Mean Absolute Error (MAE): 0.44076340028854644
Decision Tree Mean Squared Error (MSE): 2.9527816455614024
Decision Tree R^2 Score: 0.9809894160559365
Decision Tree Explained Variance Score: 0.9809905188590257
-------
Random Forest RMSE: 1.7199321347021936
Random Forest Mean Absolute Error (MAE): 0.4409083188802661
Random Forest Mean Squared Error (MSE): 2.9581665479812447
Random Forest R^2 Score: 0.980954747004252
Random Forest Explained Variance Score: 0.980955861705433
-------
Neuronal Network RMSE: 9.321453684212843
Neuronal Network Mean Absolute Error (MAE): 6.638284986911822
Neuronal Network Mean Squared Error (MSE): 86.88949878692517
Neuronal 

In [17]:
# Review Predictions und tatsächlicher Wert

results = pd.DataFrame({'Tatsächlicher Wert': y_test, 'Vorhersage': tree_predictions})
print(results)

        Tatsächlicher Wert  Vorhersage
78030                26.88   26.880000
136185               11.47   11.470000
71079                34.32   34.320000
126118               17.99   17.990000
45701                 4.10    4.100000
...                    ...         ...
60733                43.02   43.020000
6807                 18.82   18.820000
72003                17.98   17.980000
103481               10.58   12.675484
128225               14.89   14.890000

[24144 rows x 2 columns]


# Model Deployment

In [18]:
import joblib
# Speichern des Modells in einer pkl-Datei
joblib.dump(tree_model, 'Decision_Tree_AQI_Project.pkl')
# Laden des Modells aus der pkl-Datei
loaded_model = joblib.load('Decision_Tree_AQI_Project.pkl')

print(X_test)

# Verwendung des geladenen Modells für Vorhersagen
predictions = loaded_model.predict(X_test)

print(predictions)

        precipprob  windgust  visibility
78030            0      16.7         8.8
136185         100      73.5         9.3
71079            0      19.4         4.4
126118           0      20.9        32.5
45701          100      28.0         9.4
...            ...       ...         ...
60733            0      10.4        28.1
6807             0      24.7        22.8
72003            0      16.5        12.9
103481           0      33.5        10.0
128225         100      26.6        23.8

[24144 rows x 3 columns]
[26.88 11.47 34.32 ... 17.98 12.61 14.89]
