In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns


## Loading dataset

In [None]:
train_data = pd.read_csv('train.csv')

In [None]:
train_data.head()

In [None]:
train_data.isnull().sum()

## Prepare data & Split

In [None]:
X = train_data[['GrLivArea', 'BedroomAbvGr', 'FullBath']]
y = train_data['SalePrice']

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Build Model

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
predictions = model.predict(X_test)

In [None]:
# Calculate the mean squared error of the model
mse = mean_squared_error(y_test, predictions)

In [None]:
# Print the mean squared error
print('Mean Squared Error:', mse)

## Plotting

In [None]:
# Set the style for the plot
sns.set(style='whitegrid')

In [None]:
# Create a scatter plot to visualize the relationship between square footage and sale price
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_test['GrLivArea'], y=y_test, color='blue', label='Actual Prices')
sns.scatterplot(x=X_test['GrLivArea'], y=predictions, color='red', label='Predicted Prices')
plt.title('Actual vs Predicted House Prices based on Square Footage')
plt.xlabel('Square Footage (GrLivArea)')
plt.ylabel('Sale Price')
plt.legend()

In [None]:
# Show the plot
plt.show()