In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

train_df = pd.read_csv('/content/train.csv')
test_df = pd.read_csv('/content/test.csv')

numeric_cols = train_df.select_dtypes(include=[np.number]).columns.tolist()
train_df[numeric_cols] = train_df[numeric_cols].fillna(train_df[numeric_cols].mean())

common_numeric_cols = [col for col in numeric_cols if col in test_df.columns]

test_df[common_numeric_cols] = test_df[common_numeric_cols].fillna(test_df[common_numeric_cols].mean())

features = ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt']

target = 'SalePrice'

X = train_df[features]
y = train_df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
print('R2 Score:', r2_score(y_test, y_pred))

scores = cross_val_score(model, X_train, y_train, cv=5)
print('Cross-validated R2:', np.mean(scores))

X_kaggle_test = test_df[features]
X_kaggle_test = scaler.transform(X_kaggle_test)
predictions = model.predict(X_kaggle_test)

print('\nResumo de previsões finais:')
for i in range(5):
    print(f'ID: {test_df["Id"].iloc[i]}, SalePrice Predicted: {predictions[i]}')

submission = pd.DataFrame({'Id': test_df['Id'], 'SalePrice': predictions})
submission.to_csv('submission_decision_tree.csv', index=False)

Mean Squared Error: 1699691152.957382
Mean Absolute Error: 26708.171232876713
R2 Score: 0.7784069272875559
Cross-validated R2: 0.6411315543236578

Resumo de previsões finais:
ID: 1461, SalePrice Predicted: 109500.0
ID: 1462, SalePrice Predicted: 157000.0
ID: 1463, SalePrice Predicted: 175000.0
ID: 1464, SalePrice Predicted: 192000.0
ID: 1465, SalePrice Predicted: 245500.0
