Steps: 
> Import necessary libraries.

> Load the training dataset.

> Display the first few rows of the dataset [for understanding the dataset].

> Select relevant columns and clean the dataset. 

> Define features. 

> Split the data set into training and testing datasets. 

> Create a Linear Regression model. 

> Calculate mean squared error and R-squared

> Plot the results. 

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load the training dataset
data = pd.read_csv('train.csv')

In [None]:
# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(data.head())

In [None]:
# Select relevant columns
df = data[['GrLivArea', 'BedroomAbvGr', 'FullBath', 'SalePrice']]

# Check for missing values
print("\nMissing values in each column:")
print(df.isnull().sum())

# Drop rows with missing values (if any)
df = df.dropna()

# Define features and target variable
X = df[['GrLivArea', 'BedroomAbvGr', 'FullBath']]
y = df['SalePrice']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Predict the house prices for the testing set
y_pred = model.predict(X_test)

# Calculate mean squared error and R-squared
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)


In [None]:

print(f"\nMean Squared Error: {mse}")
print(f"R-squared: {r2}")

# Plot the results
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red')  # Line for reference
plt.show()