<a href="https://colab.research.google.com/github/slajan01/house_prices/blob/main/Kaggle_House_Prices_Advanced_Regression_Techniques.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

# Načtení dat
data = pd.read_csv("/content/train.csv")
blank_data = pd.read_csv("/content/test.csv")
sample_submission = pd.read_csv("/content/sample_submission.csv")

# Příprava dat
X = data.drop("SalePrice", axis=1)
y = data["SalePrice"]

# Identifikace kategorických a numerických sloupců
categorical_columns = X.select_dtypes(include=['object']).columns
numerical_columns = X.select_dtypes(exclude=['object']).columns

# Ošetření NaN hodnot
numerical_imputer = SimpleImputer(strategy='mean')
X[numerical_columns] = numerical_imputer.fit_transform(X[numerical_columns])

categorical_imputer = SimpleImputer(strategy='most_frequent')
X[categorical_columns] = categorical_imputer.fit_transform(X[categorical_columns])

# One-Hot Encoding pro kategorické sloupce
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_columns)
    ])

X_processed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=0)

# Definice modelu
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(1))

model.compile(optimizer="adam", loss="mse", metrics=["mae"])

# Trénování modelu
model.fit(X_train, y_train, epochs=130, batch_size=16, verbose=0)

# Vyhodnocení modelu
test_mse_score, test_mae_score = model.evaluate(X_test, y_test)
print(f'Test MSE: {test_mse_score}, Test MAE: {test_mae_score}')

# Predikce
blank_data[numerical_columns] = numerical_imputer.transform(blank_data[numerical_columns])
blank_data[categorical_columns] = categorical_imputer.transform(blank_data[categorical_columns])
blank_data_processed = preprocessor.transform(blank_data)
predictions = model.predict(blank_data_processed)
output = pd.DataFrame({'Id': sample_submission.Id, 'SalePrice': predictions.flatten()})
output.to_csv('submission.csv', index=False)
print("Your submission was successfully saved!")

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1419358080.0000 - mae: 20037.5156  
Test MSE: 2047358080.0, Test MAE: 19655.275390625
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Your submission was successfully saved!
