<a href="https://colab.research.google.com/github/kavya22115/Predictive_Analytics/blob/House_Price_Prediction/Houseprice_predictipon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
df = pd.read_csv('AmesHousing.csv')
df = df.drop(columns=['Order', 'PID'])
X = df.drop(columns=['SalePrice'])
y = df['SalePrice']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = X_train.copy()
X_test = X_test.copy()
X_train['TotalArea'] = X_train['Gr Liv Area'] + X_train['TotRms AbvGrd']
X_test['TotalArea'] = X_test['Gr Liv Area'] + X_test['TotRms AbvGrd']
numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X_train.select_dtypes(include=['object']).columns
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)
])
rf_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', RandomForestRegressor(n_estimators=100, random_state=42))
])
rf_pipeline.fit(X_train, y_train)
y_pred_rf = rf_pipeline.predict(X_test)
print("\n🔸 Random Forest Regressor Results:")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_rf):.2f}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_rf):.2f}")
print(f"R² Score: {r2_score(y_test, y_pred_rf):.2f}")
gbm_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', GradientBoostingRegressor(n_estimators=100, random_state=42))
])
gbm_pipeline.fit(X_train, y_train)
y_pred_gbm = gbm_pipeline.predict(X_test)
print("\n🔸 Gradient Boosting Regressor Results:")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_gbm):.2f}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_gbm):.2f}")
print(f"R² Score: {r2_score(y_test, y_pred_gbm):.2f}")



🔸 Random Forest Regressor Results:
Mean Squared Error: 700680476.07
Mean Absolute Error: 15702.17
R² Score: 0.91

🔸 Gradient Boosting Regressor Results:
Mean Squared Error: 711002854.90
Mean Absolute Error: 15226.93
R² Score: 0.91
