In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Load dataset
file_path = "USA_Housing.csv"  # Change this if needed
df = pd.read_csv(file_path)

# Display basic information
print(df.head())
print(df.info())

In [None]:
# Drop non-relevant column if present
if 'Address' in df.columns:
    df = df.drop(columns=['Address'])

# Define features and target variable
X = df.drop(columns=['Price'])
y = df['Price']

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
mae = mean_squared_error(y_test, y_pred, squared=False)  # RMSE
r2 = r2_score(y_test, y_pred)

print(f"Root Mean Squared Error: {mae}")
print(f"R-squared Score: {r2}")

In [None]:
# Visualizing Predictions vs. Actual Prices
plt.figure(figsize=(10, 6))
sns.scatterplot(x=y_test, y=y_pred, alpha=0.5)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs. Predicted House Prices")
plt.show()