## Import Libraries

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, explained_variance_score

## Load and prepare data

In [12]:
# Lade den Datensatz aus 'merged_tables.csv'
data = pd.read_csv('merged_tables.csv')
# Entferne Datensätze mit leeren Werten in den angegebenen Spalten
data.dropna(subset=['Wert', 'precipprob', 'windgust', 'visibility'], inplace=True)

## Split the data into training and testing sets:

In [13]:
# Aufteilung der Daten in Features (X) und Luftqualität (y)
X = data[['precipprob', 'windgust', 'visibility']]
y = data['Wert']

# Aufteilung der Daten in Trainings- und Testsets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model Creation

In [4]:
# Lineare Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_predictions = linear_model.predict(X_test)

Lineare Regression RMSE: 10.517236753153938


In [16]:
# Entscheidungsbaum
tree_model = DecisionTreeRegressor()
tree_model.fit(X_train, y_train)
tree_predictions = tree_model.predict(X_test)

Entscheidungsbaum RMSE: 1.8641014721382632
Mean Absolute Error (MAE): 0.472338860051359
Mean Squared Error (MSE): 3.47487429842804
R^2 Score: 0.9769467319421307
Explained Variance Score: 0.9769474453264686


In [6]:
# Random Forest
forest_model = RandomForestRegressor()
forest_model.fit(X_train, y_train)
forest_predictions = forest_model.predict(X_test)

Random Forest RMSE: 1.9226926684854884


In [22]:
# Neuronales Netzwerk
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=32)
nn_predictions = model.predict(X_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Model Evaluation

In [21]:
# Linear Regression 
linear_rmse = mean_squared_error(y_test, linear_predictions, squared=False)
print("Lineare Regression RMSE:", linear_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, linear_predictions)
print("Linear Regression Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, linear_predictions)
print("Linear Regression Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, linear_predictions)
print("Linear Regression R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, linear_predictions)
print("Linear Regression Explained Variance Score:", explained_variance)

# ------
print("-------")
# Decision Tree
tree_rmse = mean_squared_error(y_test, tree_predictions, squared=False)
print("Decision Tree RMSE:", tree_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, tree_predictions)
print("Decision Tree Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, tree_predictions)
print("Decision Tree Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, tree_predictions)
print("Decision Tree R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, tree_predictions)
print("Decision Tree Explained Variance Score:", explained_variance)

# ------
print("-------")
# Random Forest 
forest_rmse = mean_squared_error(y_test, forest_predictions, squared=False)
print("Random Forest RMSE:", forest_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, forest_predictions)
print("Random Forest Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, forest_predictions)
print("Random Forest Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, forest_predictions)
print("Random Forest R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, forest_predictions)
print("Random Forest Explained Variance Score:", explained_variance)

# ------
print("-------")
# Neuronal Network 
nn_rmse = mean_squared_error(y_test, nn_predictions, squared=False)
print("Neuronal Network RMSE:", nn_rmse)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, nn_predictions)
print("Neuronal Network Mean Absolute Error (MAE):", mae)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, nn_predictions)
print("Neuronal Network Mean Squared Error (MSE):", mse)

# R^2 Score
r2 = r2_score(y_test, nn_predictions)
print("Neuronal Network R^2 Score:", r2)

# Explained Variance Score
explained_variance = explained_variance_score(y_test, nn_predictions)
print("Neuronal Network Explained Variance Score:", explained_variance)


Lineare Regression RMSE: 10.517236753153938
Linear Regression Mean Absolute Error (MAE): 7.491540286867864
Linear Regression Mean Squared Error (MSE): 110.61226892189198
Linear Regression R^2 Score: 0.26616790509542887
Linear Regression Explained Variance Score: 0.266531266231666
-------
Decision Tree RMSE: 1.8641014721382632
Decision Tree Mean Absolute Error (MAE): 0.472338860051359
Decision Tree Mean Squared Error (MSE): 3.47487429842804
Decision Tree R^2 Score: 0.9769467319421307
Decision Tree Explained Variance Score: 0.9769474453264686
-------
Random Forest RMSE: 1.9226926684854884
Random Forest Mean Absolute Error (MAE): 0.6738695046210812
Random Forest Mean Squared Error (MSE): 3.6967470974478482
Random Forest R^2 Score: 0.9754747670100848
Random Forest Explained Variance Score: 0.9754759031682241
-------
Neuronal Network RMSE: 9.44706274252924
Neuronal Network Mean Absolute Error (MAE): 6.836361900184248
Neuronal Network Mean Squared Error (MSE): 89.24699446128409
Neuronal Netw