## Import Libraries

In [23]:
import config
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sqlalchemy import text
from tensorflow import keras
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score

## Load and prepare data

In [24]:
# Lade den Datensatz aus DB
#SQLquery = text('SELECT * FROM ' + config.db_weather_history + ' AS w JOIN ' + config.db_AQI_history + ' AS aqi ON w."datetime" = aqi."Datum"')
SQLquery = text('SELECT "Wert", "precipprob", "windgust", "visibility" FROM ' + config.db_weather_history + ' AS w JOIN ' + config.db_AQI_history + ' AS aqi ON w."datetime" = aqi."Datum"')
data = pd.read_sql(SQLquery, con=config.db_login.connect())

# Entferne Datensätze mit leeren Werten in den angegebenen Spalten
data.dropna(subset=['Wert', 'precipprob', 'windgust', 'visibility'], inplace=True)

## Split the data into training and testing sets:

In [25]:
# Aufteilung der Daten in Features (X) und Luftqualität (y)
X = data[['precipprob', 'windgust', 'visibility']]
y = data['Wert']

# Aufteilung der Daten in Trainings- und Testsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model Creation (Train and Predict)

In [26]:
# Lineare Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_predictions = linear_model.predict(X_test)

In [27]:
# Entscheidungsbaum
tree_model = DecisionTreeRegressor()
tree_model.fit(X_train, y_train)
tree_predictions = tree_model.predict(X_test)

In [28]:
# Random Forest
forest_model = RandomForestRegressor()
forest_model.fit(X_train, y_train)
forest_predictions = forest_model.predict(X_test)

In [29]:
# Neuronales Netzwerk
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=32)
nn_predictions = model.predict(X_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Model Evaluation

In [30]:
# Linear Regression 
linear_rmse = mean_squared_error(y_test, linear_predictions, squared=False)
print("Lineare Regression RMSE:", linear_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, linear_predictions)
print("Linear Regression Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, linear_predictions)
print("Linear Regression Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, linear_predictions)
print("Linear Regression R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, linear_predictions)
print("Linear Regression Explained Variance Score:", explained_variance)

# ------
print("-------")
# Decision Tree
tree_rmse = mean_squared_error(y_test, tree_predictions, squared=False)
print("Decision Tree RMSE:", tree_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, tree_predictions)
print("Decision Tree Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, tree_predictions)
print("Decision Tree Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, tree_predictions)
print("Decision Tree R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, tree_predictions)
print("Decision Tree Explained Variance Score:", explained_variance)

# ------
print("-------")
# Random Forest 
forest_rmse = mean_squared_error(y_test, forest_predictions, squared=False)
print("Random Forest RMSE:", forest_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, forest_predictions)
print("Random Forest Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, forest_predictions)
print("Random Forest Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, forest_predictions)
print("Random Forest R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, forest_predictions)
print("Random Forest Explained Variance Score:", explained_variance)

# ------
print("-------")
# Neuronal Network 
nn_rmse = mean_squared_error(y_test, nn_predictions, squared=False)
print("Neuronal Network RMSE:", nn_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, nn_predictions)
print("Neuronal Network Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, nn_predictions)
print("Neuronal Network Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, nn_predictions)
print("Neuronal Network R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, nn_predictions)
print("Neuronal Network Explained Variance Score:", explained_variance)


Lineare Regression RMSE: 10.759961678139554
Linear Regression Mean Absolute Error (MAE): 7.5502604510991675
Linear Regression Mean Squared Error (MSE): 115.77677531503178
Linear Regression R^2 Score: 0.27531646432243184
Linear Regression Explained Variance Score: 0.27605018587412244
-------
Decision Tree RMSE: 1.805886161886597
Decision Tree Mean Absolute Error (MAE): 0.45638830741911884
Decision Tree Mean Squared Error (MSE): 3.261224829693504
Decision Tree R^2 Score: 0.9795869600462528
Decision Tree Explained Variance Score: 0.9795872134164955
-------
Random Forest RMSE: 1.811624074401448
Random Forest Mean Absolute Error (MAE): 0.5045562775178334
Random Forest Mean Squared Error (MSE): 3.281981786950903
Random Forest R^2 Score: 0.9794570356712279
Random Forest Explained Variance Score: 0.9794574171589946
-------
Neuronal Network RMSE: 9.540618261069623
Neuronal Network Mean Absolute Error (MAE): 6.846510995002631
Neuronal Network Mean Squared Error (MSE): 91.02339680345516
Neuronal 

In [31]:
# Review Predictions und tatsächlicher Wert

results = pd.DataFrame({'Tatsächlicher Wert': y_test, 'Vorhersage': tree_predictions})
print(results)

       Tatsächlicher Wert  Vorhersage
44825               21.52   21.520000
63379               23.38   23.380000
56823                4.89   10.551176
59105               17.05   17.050000
59416               20.81   20.810000
...                   ...         ...
11841               55.88   55.880000
54484               18.19   18.190000
4427                22.11   22.110000
20354               77.72   77.720000
54869                9.57    9.570000

[9658 rows x 2 columns]


# Model Deployment

In [32]:
import joblib
# Speichern des Modells in einer pkl-Datei
joblib.dump(tree_model, 'Decision_Tree_AQI_Project.pkl')
# Laden des Modells aus der pkl-Datei
loaded_model = joblib.load('Decision_Tree_AQI_Project.pkl')

print(X_test)

# Verwendung des geladenen Modells für Vorhersagen
predictions = loaded_model.predict(X_test)

print(predictions)

       precipprob  windgust  visibility
44825         100      64.8        21.4
63379         100      32.9        10.0
56823         100      39.6        10.0
59105         100      38.9        13.4
59416         100      40.9        12.9
...           ...       ...         ...
11841         100      22.1         5.1
54484         100      35.8        10.0
4427          100      38.1         6.9
20354           0      14.6         5.6
54869         100      45.2        30.0

[9658 rows x 3 columns]
[21.52       23.38       10.55117647 ... 22.11       77.72
  9.57      ]
