**Loab Packages**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

**Load Data**

In [None]:
df = pd.read_csv("/kaggle/input/rental-price-of-indias-it-capital-pune-mh-ind/train.csv")
df.head(n=5)

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
df = df.drop_duplicates()
df.reset_index(drop=True,inplace=True)
df.shape

**Data Preprocessing**

In [None]:
from sklearn.preprocessing import LabelEncoder
cols = ['furnishing','avalable_for','facing','floor_type','address','gate_community','maintenance_amt','corner_pro','wheelchairadption','petfacility','propertyage']
df[cols] = df[cols].apply(LabelEncoder().fit_transform)
df.head(n=10)

**Data visualization**

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(25,30))
plt.subplot(4,2,1)
sns.countplot(data = df,x = df['bedroom'])
plt.subplot(4,2,2)
sns.countplot(data = df,x=df['bathrooms'])
plt.subplot(4,2,3)
sns.countplot(data = df,x=df['floor_number'])
plt.subplot(4,2,4)
sns.countplot(data = df,x = df['gate_community'])
plt.subplot(4,2,5)
sns.countplot(data = df,x=df['facing'])
plt.subplot(4,2,6)
sns.countplot(data = df,x=df['powerbackup'])
plt.subplot(4,2,7)
sns.countplot(data = df,x=df['petfacility'])
plt.subplot(4,2,8)
sns.countplot(data = df,x=df['wheelchairadption'])
plt.show()

**corelation**

In [None]:
plt.figure(figsize=(30,30))
sns.heatmap(df.corr(),annot=True,cmap='coolwarm')
plt.show()

 on checking correlation mostly appartments with (1 or 2) bedrooms,(1 or 2) bathrooms,floor number,floor type, **gate community,facing,powerbackup, petfacility,wheelchairadption**

**Checking outliers**

In [None]:
plt.figure(figsize=(30,30))
for i,col in enumerate(df.columns,1):
    plt.subplot(15,2,i)
    sns.boxplot(data=df,x=col, orient="h")
plt.show()

In [None]:
plt.figure(figsize=(20,20))
for i,column in enumerate(df.columns,1):
    plt.subplot(15,2,i)
    sns.violinplot(data = df,x = df[column],orient = 'h')
plt.show()

In [None]:
from scipy import stats
zscore = np.abs(stats.zscore(df))
print(zscore)

In [None]:
print(np.where(zscore > 3))

In [None]:
df = df[(zscore<3).all(axis=1)]

In [None]:
X = df.drop(['rent'],axis=True)
y = df['rent']

**splitting Data**

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

**normalisation**

In [None]:
from sklearn.preprocessing import MinMaxScaler
ms = MinMaxScaler()
x_train = ms.fit_transform(x_train)
x_test = ms.transform(x_test)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score

In [None]:
k_reg = KNeighborsRegressor()
k_reg.fit(x_train,y_train)

k_acc = r2_score(y_test,k_reg.predict(x_test))

print("Train accuracy:", (k_reg.score(x_train,y_train)))
print("Test accuracy:", (k_reg.score(x_test,y_test)))

In [None]:
l_reg = LinearRegression()
l_reg.fit(x_train,y_train)

l_acc = r2_score(y_test,l_reg.predict(x_test))

print("Train accuracy:", (l_reg.score(x_train,y_train)))
print("Test accuracy:", (l_reg.score(x_test,y_test)))

In [None]:
d_reg = DecisionTreeRegressor()
d_reg.fit(x_train,y_train)

d_acc = r2_score(y_test,d_reg.predict(x_test))

print("Train accuracy:", (d_reg.score(x_train,y_train)))
print("Test accuracy:", (d_reg.score(x_test,y_test)))

In [None]:
r_reg = RandomForestRegressor()
r_reg.fit(x_train,y_train)

r_acc = r2_score(y_test,r_reg.predict(x_test))

print("Training accuracy:",(r_reg.score(x_train,y_train)))
print("Test accuracy:",(r_reg.score(x_test,y_test)))

In [None]:
x_reg = XGBRegressor()
x_reg.fit(x_train,y_train)

x_acc = r2_score(y_test,x_reg.predict(x_test))

print("Training accuracy:",(x_reg.score(x_train,y_train)))
print("Test accuracy:",(x_reg.score(x_test,y_test)))

In [None]:
models = pd.DataFrame({
    'Models': ['Knn','Linear','DecisionTree','RandomForest','XGB'],
    'Scores': [k_acc,l_acc,d_acc,r_acc,x_acc]

})

models.sort_values(by = 'Scores',ascending = False)

In [None]:
sns.barplot(x='Models',y='Scores',data=models)
plt.show()