In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

### Load the dataset

In [None]:
data = pd.read_csv('Housing.csv')

In [None]:
data.head()

In [None]:
data.tail()

### Univariate Analysis

In [None]:
plt.figure(figsize=(8, 6))
sns.histplot(data['furnishingstatus'], kde=True)

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(data=data, y='area')

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(data=data, y='price')

### Bivariate Analysis

In [None]:
sns.boxplot(x='parking', y='area', data=data)

In [None]:
sns.barplot(x='bedrooms', y='price', data=data)

In [None]:
sns.pairplot(data, hue='area')

In [None]:
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')

### Descriptive statistics

In [None]:
data.describe()

In [None]:
data.info

### Missing values

In [None]:
data.isnull()

In [None]:
data.isnull().any()

In [None]:
data.isnull().sum()

### Outliers

In [None]:
sns.boxplot(data)

In [None]:
sns.boxplot(data['area'])

In [None]:
q1 = data['area'].quantile(0.25)
q3 = data['area'].quantile(0.75)

iqr = q3 - q1

up = q3 + (1.5 * iqr)
low = q1 - (1.5 * iqr)

data['area'] = np.where(data['area'] > up, up, 
         np.where(data['area'] < low, low, data['area']))

In [None]:
sns.boxplot(data['area'])

In [None]:
data.head()

### Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()

In [None]:
data['mainroad'] = le.fit_transform(data['mainroad'])
data['guestroom'] = le.fit_transform(data['guestroom'])
data['basement'] = le.fit_transform(data['basement'])
data['hotwaterheating'] = le.fit_transform(data['hotwaterheating'])
data['airconditioning'] = le.fit_transform(data['airconditioning'])

In [None]:
data.head()

In [None]:
from sklearn.preprocessing import OneHotEncoder

In [None]:
encoder = OneHotEncoder()

In [None]:
names = data.columns

In [None]:
encoder = OneHotEncoder()
temp = encoder.fit_transform(data.furnishingstatus.values.reshape(-1,1)).toarray()

In [None]:
temp = pd.DataFrame(temp, columns = ['furnished', 'semi-furnished', 'unfurnished'])

In [None]:
data = pd.concat([data.drop(columns=['furnishingstatus']), temp], axis=1)

In [None]:
data.head()

### Split the data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x = data.drop(columns = ['price'])

In [None]:
y = data['price']

### Scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
x.head()

In [None]:
names = x.columns

In [None]:
x = scaler.fit_transform(x)
x = pd.DataFrame(x, columns = names)

In [None]:
x.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

### Build Model

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression()

### Train model

In [None]:
model.fit(x_train, y_train)

### Test model

In [None]:
pred = model.predict(x_test)

### Evaluate

In [None]:
from sklearn.metrics import *

In [None]:
accuracy = r2_score(pred, y_test) * 100

In [None]:
accuracy