In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

# Load the cleaned dataset
df = pd.read_csv("swiss_no_outliers_and_cleaned.csv")
df.head()

Unnamed: 0,Rooms,Footage,Rent,address,title,description,street,city,canton,Address_Latitude,Address_Longitude,City_Latitude,City_Longitude,Distance_to_City_Center(km)
0,5.5,150,2480,"Hofackerstrasse 6, 8444 Henggart, ZH",«Tolle 5.5-Zimmer-Maisonettewohnung an bevorzu...,Wir vermieten per 1. Oktober 2023 eine moderne...,Hofackerstrasse 6,8444 Henggart,ZH,47.563207,8.684111,47.563229,8.683573,0.040428
1,3.5,63,1740,"Schmittegass 11, 8197 Rafz, ZH",«Schöne Neubau-Wohnung an zentraler Lage»,,Schmittegass 11,8197 Rafz,ZH,47.612987,8.537849,47.611593,8.540267,0.238491
2,3.5,104,2270,"Zürcherstrasse 163, 8406 Winterthur, ZH",«Grosszügige Wohnung in Winterthur»,,Zürcherstrasse 163,8406 Winterthur,ZH,47.491548,8.706753,47.499172,8.72915,1.884125
3,3.5,83,2290,"Usterstrasse 125, 8620 Wetzikon ZH, ZH",«Erstvermietung - 3.5 Zimmerwohnung zu vermieten»,,Usterstrasse 125,8620 Wetzikon ZH,ZH,47.32918,8.784965,47.322693,8.798094,1.224594
4,2.5,57,1580,"Mörlerstrasse 22, 8248 Uhwiesen, ZH","«Moderne, sonnige 2,5-Zimmerwohnung an ruhiger...",,Mörlerstrasse 22,8248 Uhwiesen,ZH,47.669667,8.641022,47.670994,8.635098,0.467456


In [5]:
# Select features and target variable
#?# X sind die features mit denen Y ermittelt wird. Demfalls ist Y das target
X = df[["Rooms", "Footage", "Distance_to_City_Center(km)", "Address_Latitude", "Address_Longitude"]]
y = df["Rent"]

# Split the dataset into training and testing sets
#?# wieso ist y klein geschrieben, wieso muss nur die test_size angegeben (ang. 20% und 80%)
#?# wieso hat man zwei sets Xtrain und X test ist das gehören Xtest und xtest zusammen oder x
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
#!# hier wird das Model erzeugt mittels Tensorflow.keras
def create_model(optimizer="adam", neurons=10):
    model = Sequential()
    model.add(Dense(neurons, input_dim=5, activation="relu"))
    model.add(Dense(1))
    model.compile(loss="mean_squared_error", optimizer=optimizer)
    return model

In [7]:
#!# alternativ model using RNN -- Was very bad --
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

def create_model(optimizer="adam", neurons=10):
    model = Sequential()
    model.add(SimpleRNN(neurons, input_shape=(5, 1), activation="relu"))
    model.add(Dense(1))
    model.compile(loss="mean_squared_error", optimizer=optimizer)
    return model


In [None]:
#$# this is extra and not needed and for visual purposes only
from tensorflow.keras.callbacks import TensorBoard

model = create_model()
tensorboard_callback = TensorBoard(log_dir="./logs", histogram_freq=1)
model.fit(X_train, y_train, epochs=10, callbacks=[tensorboard_callback])
%tensorboard --logdir= /logs/train


In [8]:
regressor = KerasRegressor(build_fn=create_model, verbose=0)

  regressor = KerasRegressor(build_fn=create_model, verbose=0)


In [None]:
#ETA 17min
param_grid = {
    "optimizer": ["adam", "rmsprop"],
    "neurons": [5, 10, 15],
    "epochs": [5, 100],
    "batch_size": [16, 32],
}

In [9]:
#ETA 4min
param_grid = {
    "optimizer": ["adam", "rmsprop"],
    "neurons": [5, 10],
    "epochs": [20, 40],
    "batch_size": [32, 64],
}

In [10]:
grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=KFold(n_splits=5), verbose=2)
grid_result = grid_search.fit(X_train, y_train)


Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=adam; total time=   7.9s
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=adam; total time=   6.8s
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=adam; total time=   7.0s
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=adam; total time=   6.7s
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=adam; total time=   7.0s
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=rmsprop; total time=   6.6s
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=rmsprop; total time=   7.0s
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=rmsprop; total time=   6.8s
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=rmsprop; total time=   6.6s
[CV] END batch_size=32, epochs=20, neurons=5, optimizer=rmsprop; total time=   7.7s
[CV] END batch_size=32, epochs=20, neurons=10, optimizer=adam; total time=   7.3s
[CV] END batch_size=32, epochs=2

In [12]:
#unsure if this works but i want to evalute the model here
best_model = grid_result.best_estimator_.model
test_loss = best_model.evaluate(X_test, y_test)
print('Test loss:', test_loss)

Test loss: 182024.65625


In [13]:
best_model = grid_result.best_estimator_.model
test_mse = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test MSE: {test_mse:.2f}")


Test MSE: 182024.66


In [None]:
# Get the best model from the grid search
best_model = grid_result.best_estimator_.model

# Extract the weights of the best model
weights = best_model.get_weights()

# Save the weights to a file
best_model.save_weights('1_best_model_weights.h5')


In [None]:
# Load the weights into a compatible model
model.load_weights('best_model_weights.h5')
