In [19]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split

# Load the CSV file into a DataFrame
file_path = 'out.csv'  # Replace with your actual file path
data = pd.read_csv(file_path)

# Define preprocessing function
def preprocess_data(df, selected_features):
    df = pd.get_dummies(df[selected_features], drop_first=True)
    return df

selected_features = [
    'YrSold', 'MoSold', 'mortgage_interest', 'inflation_rates',
    'OverallQual', 'OverallCond', 'GrLivArea', 'TotalBsmtSF',
    'GarageCars', 'GarageArea', 'Neighborhood', 'YearBuilt',
    'YearRemodAdd', 'SalePrice', 'Street', 'PoolArea', 'PoolQC', 'LotArea', 'Heating', 'CentralAir'
]

# Preprocess the data
preprocessed_data = preprocess_data(data, selected_features)
X = preprocessed_data.drop('SalePrice', axis=1)
y = preprocessed_data['SalePrice']

# Save column names after preprocessing
columns_after_preprocessing = X.columns

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Gradient Boosting model
model = GradientBoostingRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Function to prepare and make predictions
def make_prediction(record, model, columns):
    # Ensure the record has the same columns as the training data
    record = record.reindex(columns=columns, fill_value=0)
    return model.predict(record)

# Selecting two specific records
record_1 = preprocess_data(data.iloc[[0]], selected_features).drop('SalePrice', axis=1)
record_2 = preprocess_data(data.iloc[[1]], selected_features).drop('SalePrice', axis=1)

# Make predictions for the two records
prediction_1 = make_prediction(record_1, model, columns_after_preprocessing)
prediction_2 = make_prediction(record_2, model, columns_after_preprocessing)

# Print predictions and raw data
# print("Record 1 Raw Data:", data.iloc[0])
print("Record 1 Prediction:", prediction_1)
# print("Record 2 Raw Data:", data.iloc[1])
print("Record 2 Prediction:", prediction_2)

# Experimentation with features
def modify_feature(record, feature_name, new_value):
    modified_record = record.copy()
    modified_record[feature_name] = new_value
    return modified_record

# Varying feature 'OverallQual'
increased_overall_qual_record = modify_feature(record_1.copy(), 'OverallQual', record_1['OverallQual'].iloc[0] + 1)
prediction_increased_oq = make_prediction(increased_overall_qual_record, model, columns_after_preprocessing)

# Varying feature 'GrLivArea'
increased_gr_liv_area_record = modify_feature(record_1.copy(), 'GrLivArea', record_1['GrLivArea'].iloc[0] + 100)
prediction_increased_gla = make_prediction(increased_gr_liv_area_record, model, columns_after_preprocessing)

increased_overall__reord = modify_feature(increased_overall_qual_record.copy(), 'OverallQual', increased_overall_qual_record['GrLivArea'].iloc[0] + 100)
prediction_Increased_overall = make_prediction(increased_overall__reord, model, columns_after_preprocessing)

decreased_overall_qual_record = modify_feature(record_1.copy(), 'OverallQual', record_1['OverallQual'].iloc[0] + 1)
decreased_gr_liv_area_record = modify_feature(decreased_overall_qual_record.copy(), 'GrLivArea', decreased_overall_qual_record['GrLivArea'].iloc[0] - 100)
prediction_decreased_gla = make_prediction(decreased_gr_liv_area_record, model, columns_after_preprocessing)

# Print the modified predictions

print("Increased Overall Quality prediction:", prediction_increased_oq)
print("Increased Ground Living Area prediction:", prediction_increased_gla)
print("Increased both prediction:", prediction_Increased_overall)
print("Increased one A and Decreased B :", prediction_decreased_gla)



Record 1 Prediction: [194030.91012065]
Record 2 Prediction: [153141.73973419]
Increased Overall Quality prediction: [219534.81670183]
Increased Ground Living Area prediction: [200336.89861743]
Increased both prediction: [317744.82273628]
Increased one A and Decreased B : [215298.41882734]
