In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.callbacks import EarlyStopping

In [12]:
df = pd.read_csv('train.csv')

In [13]:
print(df.columns.tolist())

['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'PoolQC'

In [14]:
cols_to_cat=['MSSubClass', 'MSZoning','Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature','MoSold', 'YrSold', 'SaleType', 'SaleCondition']
for col in cols_to_cat:
    df[col] = df[col].astype('category')
    df[col] = df[col].cat.add_categories('Missing').fillna('Missing')

In [15]:
df = df.drop('Id', axis=1)
df

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,60,RL,65.0,8450,Pave,Missing,Reg,Lvl,AllPub,Inside,...,0,Missing,Missing,Missing,0,2,2008,WD,Normal,208500
1,20,RL,80.0,9600,Pave,Missing,Reg,Lvl,AllPub,FR2,...,0,Missing,Missing,Missing,0,5,2007,WD,Normal,181500
2,60,RL,68.0,11250,Pave,Missing,IR1,Lvl,AllPub,Inside,...,0,Missing,Missing,Missing,0,9,2008,WD,Normal,223500
3,70,RL,60.0,9550,Pave,Missing,IR1,Lvl,AllPub,Corner,...,0,Missing,Missing,Missing,0,2,2006,WD,Abnorml,140000
4,60,RL,84.0,14260,Pave,Missing,IR1,Lvl,AllPub,FR2,...,0,Missing,Missing,Missing,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,60,RL,62.0,7917,Pave,Missing,Reg,Lvl,AllPub,Inside,...,0,Missing,Missing,Missing,0,8,2007,WD,Normal,175000
1456,20,RL,85.0,13175,Pave,Missing,Reg,Lvl,AllPub,Inside,...,0,Missing,MnPrv,Missing,0,2,2010,WD,Normal,210000
1457,70,RL,66.0,9042,Pave,Missing,Reg,Lvl,AllPub,Inside,...,0,Missing,GdPrv,Shed,2500,5,2010,WD,Normal,266500
1458,20,RL,68.0,9717,Pave,Missing,Reg,Lvl,AllPub,Inside,...,0,Missing,Missing,Missing,0,4,2010,WD,Normal,142125


In [16]:
df.dtypes

MSSubClass       category
MSZoning         category
LotFrontage       float64
LotArea             int64
Street           category
                   ...   
MoSold           category
YrSold           category
SaleType         category
SaleCondition    category
SalePrice           int64
Length: 80, dtype: object

In [18]:

na_counts = df.isna().sum().sort_values(ascending=False)
print(na_counts)

LotFrontage     259
MasVnrArea        8
MSSubClass        0
KitchenAbvGr      0
GarageYrBlt       0
               ... 
ExterCond         0
ExterQual         0
MasVnrType        0
Exterior2nd       0
SalePrice         0
Length: 80, dtype: int64


In [21]:

df['LotFrontage'] = df['LotFrontage'].fillna(df['LotFrontage'].mean())
df=df.dropna()

In [24]:
def stepwise_selection(data, response, predictors, verbose=True):
    import warnings
    warnings.filterwarnings("ignore")

    included = []
    best_aic = float('inf')

    while True:
        changed = False
        # Krok w przód
        excluded = list(set(predictors) - set(included))
        new_aic = {}
        for new_column in excluded:
            formula = f"{response} ~ " + ' + '.join([f"C({col})" for col in included + [new_column]])
            model = ols(formula, data).fit()
            new_aic[new_column] = model.aic
        if new_aic:
            best_new = min(new_aic, key=new_aic.get)
            if new_aic[best_new] < best_aic:
                included.append(best_new)
                best_aic = new_aic[best_new]
                changed = True
                if verbose:
                    print(f"➕ Dodano: {best_new} | AIC = {best_aic:.2f}")

        # Krok w tył
        if len(included) > 1:
            aic_with_removal = {}
            for col in included:
                formula = f"{response} ~ " + ' + '.join([f"C({c})" for c in included if c != col])
                model = ols(formula, data).fit()
                aic_with_removal[col] = model.aic
            worst = min(aic_with_removal, key=aic_with_removal.get)
            if aic_with_removal[worst] < best_aic:
                included.remove(worst)
                best_aic = aic_with_removal[worst]
                changed = True
                if verbose:
                    print(f"➖ Usunięto: {worst} | AIC = {best_aic:.2f}")

        if not changed:
            break

    return included

In [25]:

predictors = ['MSSubClass', 'MSZoning','Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature','MoSold', 'YrSold', 'SaleType', 'SaleCondition']


selected = stepwise_selection(df, 'SalePrice', predictors)

formula = 'SalePrice ~ ' + ' + '.join([f"C({col})" for col in selected])
model = ols(formula, df).fit()

# Podsumowanie
print("\n📋 Wybrane zmienne:", selected)
print(model.summary())


➕ Dodano: OverallQual | AIC = 35235.25
➕ Dodano: Neighborhood | AIC = 34913.99
➕ Dodano: TotRmsAbvGrd | AIC = 34708.03
➕ Dodano: BsmtExposure | AIC = 34586.10
➕ Dodano: MSSubClass | AIC = 34495.12
➕ Dodano: RoofMatl | AIC = 34406.35
➕ Dodano: FullBath | AIC = 34330.96
➕ Dodano: KitchenQual | AIC = 34251.94
➕ Dodano: Fireplaces | AIC = 34182.27
➕ Dodano: GarageCars | AIC = 34117.89
➕ Dodano: BsmtFinType1 | AIC = 34062.98
➕ Dodano: Condition2 | AIC = 34013.77
➕ Dodano: BsmtQual | AIC = 33965.34
➕ Dodano: PoolQC | AIC = 33921.81
➕ Dodano: SaleCondition | AIC = 33883.29
➕ Dodano: OverallCond | AIC = 33849.35
➕ Dodano: LotConfig | AIC = 33819.41
➕ Dodano: HalfBath | AIC = 33796.01
➕ Dodano: BsmtFullBath | AIC = 33779.03
➕ Dodano: Exterior1st | AIC = 33763.25
➕ Dodano: BedroomAbvGr | AIC = 33750.97
➕ Dodano: Condition1 | AIC = 33741.29
➕ Dodano: LotShape | AIC = 33734.21
➕ Dodano: RoofStyle | AIC = 33729.00
➕ Dodano: MSZoning | AIC = 33726.40
➕ Dodano: GarageType | AIC = 33723.55
➕ Dodano: H

In [27]:
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table_sorted = anova_table.sort_values(by='sum_sq', ascending=False)
print(anova_table_sorted)

                        sum_sq      df          F        PR(>F)
Residual          8.122653e+11  1275.0        NaN           NaN
C(Neighborhood)   1.549693e+11    25.0   9.730117  7.223865e-33
C(OverallQual)    1.314372e+11    10.0  20.631490  9.882278e-33
C(RoofMatl)       1.141159e+11     8.0  22.390740  1.011597e-28
C(FullBath)       8.025904e+10     4.0  31.495339  1.219398e-19
C(GarageCars)     6.556285e+10     5.0  20.582596  1.879900e-16
C(TotRmsAbvGrd)   6.450447e+10    12.0   8.437637  1.739871e-14
C(MSSubClass)     5.743330e+10    15.0   6.010143  1.275059e-11
C(Condition2)     5.185654e+10     8.0  10.174800  1.916571e-12
C(Fireplaces)     5.138475e+10     4.0  20.164459  9.038914e-13
C(BsmtExposure)   4.503848e+10     4.0  17.674050  3.951125e-14
C(PoolQC)         4.084927e+10     3.0  21.373483  1.647674e-13
C(OverallCond)    3.418512e+10     9.0   5.962205  1.576464e-07
C(SaleCondition)  2.954610e+10     6.0   7.729675  3.571389e-07
C(Exterior1st)    2.871061e+10    15.0  

In [34]:
new_predictors = [
    'Neighborhood', 'BsmtExposure', 'HouseStyle','OverallQual', 'RoofMatl',
    'FullBath', 'GarageCars', 'TotRmsAbvGrd', 'MSSubClass', 'Condition2',
    'Fireplaces', 'BsmtQual',
    'FireplaceQu'
]
selected = stepwise_selection(df, 'SalePrice', new_predictors)

formula = 'SalePrice ~ ' + ' + '.join([f"C({col})" for col in selected])
model = ols(formula, df).fit()

# Podsumowanie
print("\n📋 Wybrane zmienne:", selected)
print(model.summary())
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)

➕ Dodano: OverallQual | AIC = 35235.25
➕ Dodano: Neighborhood | AIC = 34913.99
➕ Dodano: TotRmsAbvGrd | AIC = 34708.03
➕ Dodano: BsmtExposure | AIC = 34586.10
➕ Dodano: MSSubClass | AIC = 34495.12
➕ Dodano: RoofMatl | AIC = 34406.35
➕ Dodano: FullBath | AIC = 34330.96
➕ Dodano: Fireplaces | AIC = 34260.30
➕ Dodano: GarageCars | AIC = 34189.62
➕ Dodano: BsmtQual | AIC = 34135.81
➕ Dodano: Condition2 | AIC = 34095.42

📋 Wybrane zmienne: ['OverallQual', 'Neighborhood', 'TotRmsAbvGrd', 'BsmtExposure', 'MSSubClass', 'RoofMatl', 'FullBath', 'Fireplaces', 'GarageCars', 'BsmtQual', 'Condition2']
                            OLS Regression Results                            
Dep. Variable:              SalePrice   R-squared:                       0.870
Model:                            OLS   Adj. R-squared:                  0.862
Method:                 Least Squares   F-statistic:                     101.5
Date:                Mon, 14 Jul 2025   Prob (F-statistic):               0.00
Time:     

In [35]:
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)

                       sum_sq      df          F        PR(>F)
C(OverallQual)   3.078838e+11    10.0  35.417622  1.404584e-56
C(Neighborhood)  2.556969e+11    25.0  11.765703  3.191941e-41
C(TotRmsAbvGrd)  1.012687e+11    12.0   9.707930  4.253120e-17
C(BsmtExposure)  8.066887e+10     4.0  23.199493  1.426230e-18
C(MSSubClass)    1.085268e+11    15.0   8.322968  2.009678e-17
C(RoofMatl)      1.024391e+11     8.0  14.730182  1.321368e-18
C(FullBath)      9.983973e+10     4.0  28.712822  5.305679e-18
C(Fireplaces)    9.380436e+10     4.0  26.977116  5.992697e-17
C(GarageCars)    7.772355e+10     5.0  17.881960  2.547485e-14
C(BsmtQual)      5.273347e+10     4.0  15.165574  3.891599e-12
C(Condition2)    5.160695e+10     8.0   7.420800  8.892328e-09
Residual         1.183111e+12  1361.0        NaN           NaN


In [36]:
columns_to_keep = ['SalePrice'] + new_predictors
df_new = df[columns_to_keep].copy()
df_new.head()

Unnamed: 0,SalePrice,Neighborhood,BsmtExposure,HouseStyle,OverallQual,RoofMatl,FullBath,GarageCars,TotRmsAbvGrd,MSSubClass,Condition2,Fireplaces,BsmtQual,FireplaceQu
0,208500,CollgCr,No,2Story,7,CompShg,2,2,8,60,Norm,0,Gd,Missing
1,181500,Veenker,Gd,1Story,6,CompShg,2,2,6,20,Norm,1,Gd,TA
2,223500,CollgCr,Mn,2Story,7,CompShg,2,2,6,60,Norm,1,Gd,TA
3,140000,Crawfor,No,2Story,7,CompShg,1,3,7,70,Norm,1,TA,Gd
4,250000,NoRidge,Av,2Story,8,CompShg,2,3,9,60,Norm,1,Gd,TA


In [37]:
cat_cols = df_new.select_dtypes(include='category').columns
category_counts = {}

for col in cat_cols:
    category_counts[col] = df_new[col].value_counts().to_dict()
print(category_counts)

{'Neighborhood': {'NAmes': 225, 'CollgCr': 149, 'OldTown': 113, 'Edwards': 100, 'Somerst': 83, 'Gilbert': 78, 'NridgHt': 76, 'Sawyer': 74, 'NWAmes': 73, 'BrkSide': 58, 'SawyerW': 58, 'Crawfor': 50, 'Mitchel': 49, 'NoRidge': 41, 'Timber': 38, 'IDOTRR': 37, 'ClearCr': 28, 'SWISU': 25, 'StoneBr': 25, 'MeadowV': 17, 'Blmngtn': 17, 'BrDale': 16, 'Veenker': 11, 'NPkVill': 9, 'Blueste': 2, 'Missing': 0}, 'BsmtExposure': {'No': 946, 'Av': 221, 'Gd': 133, 'Mn': 114, 'Missing': 38}, 'HouseStyle': {'1Story': 721, '2Story': 442, '1.5Fin': 154, 'SLvl': 65, 'SFoyer': 37, '1.5Unf': 14, '2.5Unf': 11, '2.5Fin': 8, 'Missing': 0}, 'OverallQual': {5: 397, 6: 372, 7: 315, 8: 167, 4: 116, 9: 43, 3: 20, 10: 17, 2: 3, 1: 2, 'Missing': 0}, 'RoofMatl': {'CompShg': 1426, 'Tar&Grv': 11, 'WdShngl': 6, 'WdShake': 5, 'ClyTile': 1, 'Membran': 1, 'Metal': 1, 'Roll': 1, 'Missing': 0}, 'FullBath': {2: 762, 1: 649, 3: 32, 0: 9, 'Missing': 0}, 'GarageCars': {2: 817, 1: 369, 3: 180, 0: 81, 4: 5, 'Missing': 0}, 'TotRmsAbvGr

In [38]:

threshold = 30  # możesz zmienić próg

# Tworzymy mapę: kolumna -> lista rzadkich kategorii
rare_map = {}
for col in cat_cols:
    counts = df_new[col].value_counts()
    rare = counts[counts < threshold].index.tolist()
    rare_map[col] = rare

# Zastosuj mapę do redukcji liczby kategorii
for col in cat_cols:
    df_new[col] = df_new[col].apply(lambda x: 'Other' if x in rare_map[col] else x)

print(rare_map)

df_new.head()

{'Neighborhood': ['ClearCr', 'SWISU', 'StoneBr', 'MeadowV', 'Blmngtn', 'BrDale', 'Veenker', 'NPkVill', 'Blueste', 'Missing'], 'BsmtExposure': [], 'HouseStyle': ['1.5Unf', '2.5Unf', '2.5Fin', 'Missing'], 'OverallQual': [3, 10, 2, 1, 'Missing'], 'RoofMatl': ['Tar&Grv', 'WdShngl', 'WdShake', 'ClyTile', 'Membran', 'Metal', 'Roll', 'Missing'], 'FullBath': [0, 'Missing'], 'GarageCars': [4, 'Missing'], 'TotRmsAbvGrd': [11, 3, 12, 2, 14, 'Missing'], 'MSSubClass': [85, 75, 45, 180, 40, 'Missing'], 'Condition2': ['Feedr', 'Artery', 'PosN', 'RRNn', 'PosA', 'RRAe', 'RRAn', 'Missing'], 'Fireplaces': [3, 'Missing'], 'BsmtQual': [], 'FireplaceQu': ['Ex', 'Po']}


Unnamed: 0,SalePrice,Neighborhood,BsmtExposure,HouseStyle,OverallQual,RoofMatl,FullBath,GarageCars,TotRmsAbvGrd,MSSubClass,Condition2,Fireplaces,BsmtQual,FireplaceQu
0,208500,CollgCr,No,2Story,7,CompShg,2,2,8,60,Norm,0,Gd,Missing
1,181500,Other,Gd,1Story,6,CompShg,2,2,6,20,Norm,1,Gd,TA
2,223500,CollgCr,Mn,2Story,7,CompShg,2,2,6,60,Norm,1,Gd,TA
3,140000,Crawfor,No,2Story,7,CompShg,1,3,7,70,Norm,1,TA,Gd
4,250000,NoRidge,Av,2Story,8,CompShg,2,3,9,60,Norm,1,Gd,TA


In [40]:
# Wypisz wszystkie zmienne numeryczne w df_new
num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
print(num_cols)

['LotFrontage', 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'SalePrice']


In [43]:
# Korelacja Pearsona zmiennych numerycznych z SalePrice
num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
correlations = df[num_cols].corrwith(df['SalePrice']).sort_values(ascending=False)
print(correlations)

SalePrice        1.000000
GrLivArea        0.710080
GarageArea       0.622492
TotalBsmtSF      0.612971
1stFlrSF         0.606849
MasVnrArea       0.477493
BsmtFinSF1       0.383977
LotFrontage      0.333322
WoodDeckSF       0.324650
2ndFlrSF         0.322710
OpenPorchSF      0.311268
LotArea          0.264674
BsmtUnfSF        0.215740
ScreenPorch      0.113044
PoolArea         0.093109
3SsnPorch        0.045247
BsmtFinSF2      -0.010316
MiscVal         -0.020951
LowQualFinSF    -0.025263
EnclosedPorch   -0.128778
dtype: float64


In [44]:
# Dodaj wybrane kolumny numeryczne do df_new
extra_num_cols = ['GrLivArea', 'GarageArea', 'TotalBsmtSF', '1stFlrSF']
for col in extra_num_cols:
    if col not in df_new.columns:
        df_new[col] = df[col]
df_new.head()

Unnamed: 0,SalePrice,Neighborhood,BsmtExposure,HouseStyle,OverallQual,RoofMatl,FullBath,GarageCars,TotRmsAbvGrd,MSSubClass,Condition2,Fireplaces,BsmtQual,FireplaceQu,GrLivArea,GarageArea,TotalBsmtSF,1stFlrSF
0,208500,CollgCr,No,2Story,7,CompShg,2,2,8,60,Norm,0,Gd,Missing,1710,548,856,856
1,181500,Other,Gd,1Story,6,CompShg,2,2,6,20,Norm,1,Gd,TA,1262,460,1262,1262
2,223500,CollgCr,Mn,2Story,7,CompShg,2,2,6,60,Norm,1,Gd,TA,1786,608,920,920
3,140000,Crawfor,No,2Story,7,CompShg,1,3,7,70,Norm,1,TA,Gd,1717,642,756,961
4,250000,NoRidge,Av,2Story,8,CompShg,2,3,9,60,Norm,1,Gd,TA,2198,836,1145,1145


In [48]:
X = df_new.drop('SalePrice', axis=1)
y = df_new['SalePrice']

# Zamień wszystkie wartości w kolumnach kategorycznych na stringi
for col in cat_cols:
    X[col] = X[col].astype(str)

# 2. Zmienne kategoryczne i numeryczne
cat_cols = X.select_dtypes(include='object').columns
num_cols = X.select_dtypes(include=[np.number]).columns

# 3. Pipeline przetwarzania danych
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols)
])

# 4. Pipeline do danych
X_processed = preprocessor.fit_transform(X)

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# 5. Podział train/test
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(256, activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(1)
])


# 7. Kompilacja
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# 8. Trenowanie
model.fit(X_train, y_train, epochs=1000, batch_size=32, validation_split=0.2, callbacks=[early_stop])

# 9. Ocena
loss, mae = model.evaluate(X_test, y_test)
print(f"MAE: {mae}")
from sklearn.metrics import r2_score
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"R^2 na zbiorze testowym: {r2:.4f} ({r2*100:.2f}%)")



Epoch 1/1000
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - loss: 40290615296.0000 - mae: 184158.6250 - val_loss: 39129608192.0000 - val_mae: 180646.5312
Epoch 2/1000
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 39140278272.0000 - mae: 179030.0312 - val_loss: 32562333696.0000 - val_mae: 162247.2969
Epoch 3/1000
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 23463370752.0000 - mae: 133926.1875 - val_loss: 3736270080.0000 - val_mae: 48675.7422
Epoch 4/1000
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 3996705792.0000 - mae: 45341.0664 - val_loss: 2034423552.0000 - val_mae: 29210.8555
Epoch 5/1000
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 2355874560.0000 - mae: 31383.9727 - val_loss: 1639265024.0000 - val_mae: 27699.9141
Epoch 6/1000
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 

In [49]:
from sklearn.ensemble import RandomForestRegressor

# One-hot encoding tylko dla df_new
X = pd.get_dummies(df_new.drop('SalePrice', axis=1), drop_first=True)
y = df_new['SalePrice']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
print("R^2 na zbiorze testowym:", rf.score(X_test, y_test))

R^2 na zbiorze testowym: 0.8634971604964015
