In [None]:
# Model 1 woth autoencoder
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, SimpleRNN, LSTM, GRU, Dropout
from tensorflow.keras.optimizers import Adam

# Load the cleaned dataset
df = pd.read_csv("swiss_no_outliers_and_cleaned.csv")

# Select features and target variable
X = df[["Rooms", "Footage", "Distance_to_City_Center(km)", "Address_Latitude", "Address_Longitude"]]
y = df["Rent"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the input data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)

# Define the input layer
input_data = Input(shape=(5,))

# Define the encoder
encoded = Dense(64, activation='relu')(input_data)
encoded = Dense(32, activation='relu')(encoded)

# Define the bottleneck (compressed representation)
bottleneck = Dense(8, activation='relu')(encoded)

# Define the decoder
decoded = Dense(32, activation='relu')(bottleneck)
decoded = Dense(64, activation='relu')(decoded)

# Define the output layer
output_data = Dense(5, activation='linear')(decoded)

# Create the autoencoder model
autoencoder = Model(input_data, output_data)
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Train the autoencoder model
autoencoder.fit(X_scaled, X_scaled, epochs=100, batch_size=32, validation_split=0.2)

# Extract the encoder part of the autoencoder
encoder = Model(input_data, bottleneck)

# Generate a lower-dimensional representation of your input data
X_encoded = encoder.predict(X_scaled)

# Reshape the encoded data into a suitable format for RNNs, LSTMs, or GRUs
timesteps = 2
features = 4
X_encoded_reshaped = X_encoded.reshape((-1, timesteps, features))

# Split the reshaped data into training and testing sets
X_encoded_train, X_encoded_test, y_train, y_test = train_test_split(X_encoded_reshaped, y_train, test_size=0.2, random_state=42)

# Define a function to create a model based on the model type
def create_model(model_type, units):
    model = Sequential()

    if model_type == "RNN":
        model.add(SimpleRNN(units, input_shape=(timesteps, features), activation='relu', return_sequences=True))
        model.add(Dropout(0.2))
        model.add(SimpleRNN(units, activation='relu'))
    elif model_type == "LSTM":
        model.add(LSTM(units, input_shape=(timesteps, features), activation='relu', return_sequences=True))
        model.add(Dropout(0.2))
        model.add(LSTM(units, activation='relu'))
    elif model_type == "GRU":
        model.add(GRU(units, input_shape=(timesteps, features), activation='relu', return_sequences=True))
        model.add(Dropout(0.2))
        model.add(GRU(units, activation='relu'))

    model.add(Dense(units, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
    return model

# Train the models and compare their performance
model_types = ["RNN", "LSTM", "GRU"]
units = 32
epochs = 200
batch_size = 32

results = {}

for model_type in model_types:
    print(f"Training {model_type} model...")
    model = create_model(model_type, units)
    model.fit(X_encoded_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)

    y_pred_train = model.predict(X_encoded_train)
    y_pred_test = model.predict(X_encoded_test)

    # Calculate evaluation metrics
    mse_train = mean_squared_error(y_train, y_pred_train)
    mse_test = mean_squared_error(y_test, y_pred_test)
    r2_train = r2_score(y_train, y_pred_train)
    r2_test = r2_score(y_test, y_pred_test)

    results[model_type] = {
        "mse_train": mse_train,
        "mse_test": mse_test,
        "r2_train": r2_train,
        "r2_test": r2_test,
    }

# Print the results
for model_type, result in results.items():
    print(f"{model_type} model:")
    print(f" MSE (Train): {result['mse_train']:.2f}")
    print(f" MSE (Test): {result['mse_test']:.2f}")
    print(f" R2 (Train): {result['r2_train']:.2f}")
    print(f" R2 (Test): {result['r2_test']:.2f}")


In [None]:
#Model 2 
##Version mit RandomizedSearchCV
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, SimpleRNN, LSTM, GRU, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Load the cleaned dataset
df = pd.read_csv("swiss_no_outliers_and_cleaned.csv")

# Select features and target variable
X = df[["Rooms", "Footage", "Distance_to_City_Center(km)", "Address_Latitude", "Address_Longitude"]]
y = df["Rent"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the input data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)

# Define the input layer
input_data = Input(shape=(5,))

# Define the encoder
encoded = Dense(64, activation='relu')(input_data)
encoded = Dense(32, activation='relu')(encoded)
encoded = Dense(16, activation='relu')(encoded)

# Define the bottleneck (compressed representation)
bottleneck = Dense(8, activation='relu')(encoded)

# Define the decoder
decoded = Dense(16, activation='relu')(bottleneck)
decoded = Dense(32, activation='relu')(decoded)
decoded = Dense(64, activation='relu')(decoded)

# Define the output layer
output_data = Dense(5, activation='linear')(decoded)

# Create the autoencoder model
autoencoder = Model(input_data, output_data)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# Train the autoencoder model
autoencoder.fit(X_scaled, X_scaled, epochs=100, batch_size=32, validation_split=0.2)

# Extract the encoder part of the autoencoder
encoder = Model(input_data, bottleneck)

# Generate a lower-dimensional representation of your input data
X_encoded = encoder.predict(X_scaled)

# Reshape the encoded data into a suitable format for RNNs, LSTMs, or GRUs
timesteps = 2
features = 4
X_encoded_reshaped = X_encoded.reshape((-1, timesteps, features))

# Split the reshaped data into training and testing sets
X_encoded_train, X_encoded_test, y_train, y_test = train_test_split(X_encoded_reshaped, y_train, test_size=0.2, random_state=42)

from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Import necessary libraries
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

# Modify the create_model function to accept hyperparameters as arguments
def create_model(model_type="LSTM", units=128, dropout_rate=0.2, learning_rate=0.001):
    model = Sequential()

    if model_type == "RNN":
        model.add(SimpleRNN(units, input_shape=(timesteps, features), activation='relu', return_sequences=True))
        model.add(Dropout(dropout_rate))
        model.add(SimpleRNN(units, activation='relu'))
        model.add(Dropout(dropout_rate))
    elif model_type == "LSTM":
        model.add(LSTM(units, input_shape=(timesteps, features), activation='relu', return_sequences=True))
        model.add(Dropout(dropout_rate))
        model.add(LSTM(units, activation='relu'))
        model.add(Dropout(dropout_rate))
    elif model_type == "GRU":
        model.add(GRU(units, input_shape=(timesteps, features), activation='relu', return_sequences=True))
        model.add(Dropout(dropout_rate))
        model.add(GRU(units, activation='relu'))
        model.add(Dropout(dropout_rate))

    model.add(Dense(1, activation='linear'))
    model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=learning_rate))
    return model

# Import necessary libraries
from sklearn.model_selection import RandomizedSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

# Wrap the create_model function in KerasRegressor
model = KerasRegressor(build_fn=create_model, verbose=0)

# Define the parameter grid
param_grid = {
    'model_type': ['RNN', 'LSTM', 'GRU'],
    'units': [64, 128, 256, 512, 1024],
    'dropout_rate': [0.1, 0.2, 0.3, 0.4, 0.5],
    'learning_rate': [0.05, 0.01, 0.001, 0.0001],
    'batch_size': [16, 32, 64, 128],
    'epochs': [50, 100, 200],
}


# Create the RandomizedSearchCV object
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, n_jobs=-1, cv=3, scoring='neg_mean_squared_error', random_state=42)

# Fit the random search to the data
random_search_result = random_search.fit(X_encoded_train, y_train)

# Print the best hyperparameters
print("Best parameters found: ", random_search_result.best_params_)

# Retrieve the best model
best_model = random_search_result.best_estimator_.model
best_model.save("best_model.h5")

# Make predictions
y_pred_train = best_model.predict(X_encoded_train)
y_pred_test = best_model.predict(X_encoded_test)

# Calculate evaluation metrics
mse_train = mean_squared_error(y_train, y_pred_train)
mse_test = mean_squared_error(y_test, y_pred_test)
r2_train = r2_score(y_train, y_pred_train)
r2_test = r2_score(y_test, y_pred_test)

# Print the results
print("Best model:")
print(f" MSE (Train): {mse_train:.2f}")
print(f" MSE (Test): {mse_test:.2f}")
print(f" R2 (Train): {r2_train:.2f}")
print(f" R2 (Test): {r2_test:.2f}")

