In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
df = pd.read_csv("housing_data.csv")

In [3]:
print("Dataset Loaded Successfully!\n")
print(df.head())

Dataset Loaded Successfully!

   House_ID   Price  Area_sqft  Bedrooms  Bathrooms    City  Year_Built
0         1  273886       3044         4          5  Mumbai        2012
1         2  637798       4219         3          4  Mumbai        2018
2         3  353873       4322         2          1  Mumbai        2013
3         4  571847       2979         4          5  Mumbai        2007
4         5  576894       2129         5          3  Mumbai        2023


In [4]:
print("Dataset Loaded Successfully!\n")
print(df.head())

Dataset Loaded Successfully!

   House_ID   Price  Area_sqft  Bedrooms  Bathrooms    City  Year_Built
0         1  273886       3044         4          5  Mumbai        2012
1         2  637798       4219         3          4  Mumbai        2018
2         3  353873       4322         2          1  Mumbai        2013
3         4  571847       2979         4          5  Mumbai        2007
4         5  576894       2129         5          3  Mumbai        2023


In [5]:
print("\nDataset Info:")
print(df.info())


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   House_ID    20 non-null     int64 
 1   Price       20 non-null     int64 
 2   Area_sqft   20 non-null     int64 
 3   Bedrooms    20 non-null     int64 
 4   Bathrooms   20 non-null     int64 
 5   City        20 non-null     object
 6   Year_Built  20 non-null     int64 
dtypes: int64(6), object(1)
memory usage: 1.2+ KB
None


In [6]:
print("\nMissing Values:")
print(df.isnull().sum())



Missing Values:
House_ID      0
Price         0
Area_sqft     0
Bedrooms      0
Bathrooms     0
City          0
Year_Built    0
dtype: int64


In [7]:
df = df.fillna(df.mean(numeric_only=True))

In [8]:
df = df.select_dtypes(include=[np.number])

In [9]:
print("\nAfter Cleaning:")
print(df.head())


After Cleaning:
   House_ID   Price  Area_sqft  Bedrooms  Bathrooms  Year_Built
0         1  273886       3044         4          5        2012
1         2  637798       4219         3          4        2018
2         3  353873       4322         2          1        2013
3         4  571847       2979         4          5        2007
4         5  576894       2129         5          3        2023


In [10]:
X = df.drop("Price", axis=1)
y = df["Price"]

In [11]:
print("\nFeatures Shape:", X.shape)
print("Target Shape:", y.shape)


Features Shape: (20, 5)
Target Shape: (20,)


In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [13]:
print("\nTrain-Test Split Completed")
print("Training Data:", X_train.shape)
print("Testing Data:", X_test.shape)


Train-Test Split Completed
Training Data: (16, 5)
Testing Data: (4, 5)


In [14]:
model = LinearRegression()
model.fit(X_train, y_train)

In [15]:
print("\nModel Training Completed")


Model Training Completed


In [16]:
y_pred = model.predict(X_test)

In [17]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [18]:
print("\nModel Evaluation:")
print("Mean Squared Error:", mse)
print("R2 Score:", r2)



Model Evaluation:
Mean Squared Error: 88844752817.1988
R2 Score: -0.7070006011057948


In [19]:
print("\nProgram Executed Successfully!")


Program Executed Successfully!
