In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error

# Load the dataset
file_path = 'C:/Users/Sri sai murari/Desktop/USA_Housing.csv'  # Update with the correct path if necessary
data = pd.read_csv(file_path)

# Drop the 'Address' column as it's not needed for the model
data = data.drop(columns=['Address'])

# Split the data into features and target
X = data.drop(columns=['Price'])
y = data['Price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the models
rf_model = RandomForestRegressor(random_state=42)
xgb_model = XGBRegressor(random_state=42)
lgbm_model = LGBMRegressor(random_state=42)

# Train the models
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
lgbm_model.fit(X_train, y_train)

# Make predictions
rf_predictions = rf_model.predict(X_test)
xgb_predictions = xgb_model.predict(X_test)
lgbm_predictions = lgbm_model.predict(X_test)

# Calculate Mean Squared Error for each model
rf_mse = mean_squared_error(y_test, rf_predictions)
xgb_mse = mean_squared_error(y_test, xgb_predictions)
lgbm_mse = mean_squared_error(y_test, lgbm_predictions)

# Predict the price for the first test sample using each model
rf_predicted_price = rf_model.predict(X_test.iloc[[0]])
xgb_predicted_price = xgb_model.predict(X_test.iloc[[0]])
lgbm_predicted_price = lgbm_model.predict(X_test.iloc[[0]])

print(f'RandomForest MSE: {rf_mse}')
print(f"Predicted Price for the first test sample using RandomForest: {rf_predicted_price[0]}")

print(f'XGBoost MSE: {xgb_mse}')
print(f"Predicted Price for the first test sample using XGBoost: {xgb_predicted_price[0]}")

print(f'LightGBM MSE: {lgbm_mse}')
print(f"Predicted Price for the first test sample using LightGBM: {lgbm_predicted_price[0]}")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 4000, number of used features: 5
[LightGBM] [Info] Start training from score 1229576.992597
RandomForest MSE: 14465132338.701965
Predicted Price for the first test sample using RandomForest: 1318734.8938716897
XGBoost MSE: 14912750470.898375
Predicted Price for the first test sample using XGBoost: 1297134.375
LightGBM MSE: 12313469588.940966
Predicted Price for the first test sample using LightGBM: 1332323.9035836137
