#Implement a linear regression model to predict the prices of houses based on their square footage and the number of bedrooms and bathrooms.

In [62]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the training and testing datasets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')



In [63]:
# Select relevant features for training
train_features = ['GrLivArea', 'BedroomAbvGr', 'FullBath', 'HalfBath', 'SalePrice']


In [64]:
# Create a new dataframe with only the selected features for training
train_df = train_df[train_features].copy()

In [65]:
# Combine FullBath and HalfBath into a single feature for training data
train_df['TotalBath'] = train_df['FullBath'] + train_df['HalfBath'] * 0.5

In [66]:
# Drop the original FullBath and HalfBath columns from training data
train_df.drop(columns=['FullBath', 'HalfBath'], inplace=True)

In [67]:
# Define the features (X) and target (y) for training
X = train_df[['GrLivArea', 'BedroomAbvGr', 'TotalBath']]
y = train_df['SalePrice']

In [68]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [69]:

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [70]:
# Make predictions on the validation set
y_val_pred = model.predict(X_val)

In [71]:
# Evaluate the model
mae = mean_absolute_error(y_val, y_val_pred)
mse = mean_squared_error(y_val, y_val_pred)
rmse = mean_squared_error(y_val, y_val_pred, squared=False)
r2 = r2_score(y_val, y_val_pred)

In [72]:
# Print the evaluation metrics
print(f"Mean Absolute Error (MAE): ${mae:,.2f}")
print(f"Mean Squared Error (MSE): ${mse:,.2f}")
print(f"Root Mean Squared Error (RMSE): ${rmse:,.2f}")
print(f"R-squared (R²): {r2:.2f}")

Mean Absolute Error (MAE): $36,569.64
Mean Squared Error (MSE): $2,848,523,443.42
Root Mean Squared Error (RMSE): $53,371.56
R-squared (R²): 0.63


In [73]:
# Preprocess the test data
test_df['TotalBath'] = test_df['FullBath'] + test_df['HalfBath'] * 0.5
test_df.drop(columns=['FullBath', 'HalfBath'], inplace=True)
X_test = test_df[['GrLivArea', 'BedroomAbvGr', 'TotalBath']]



In [74]:
# Make predictions on the test set
test_df['SalePrice'] = model.predict(X_test)



In [75]:
# Save the predictions to a new CSV file
test_df.to_csv(r'C:\Users\DELL\Downloads\prodigy\test_predictions.csv', index=False)



In [76]:
print("Predictions have been saved to test_predictions.csv.")

Predictions have been saved to test_predictions.csv.
