In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd  
  

In [3]:
df=pd.read_csv("/content/residential_properties.csv")
df

Unnamed: 0,Size (sq ft),Bedrooms,Bathrooms,Location,Year Built,Garage Size,Distance to School (mi),Selling Price (k$)
0,1800,3,2,Downtown,1990,1,0.5,250
1,2200,4,3,Suburban,2005,2,1.2,400
2,1200,2,1,Rural,1985,0,3.6,150
3,3000,5,4,Downtown,2010,3,0.8,700
4,2500,4,3,Suburban,2000,2,2.1,500
5,1400,2,1,Rural,1970,1,2.7,175
6,2000,3,2,Downtown,2008,2,0.3,350
7,1800,3,2,Suburban,1995,1,1.8,300
8,1600,2,1,Rural,1980,0,4.3,125
9,2800,4,3,Downtown,2015,3,0.6,650


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 8 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Size (sq ft)             19 non-null     int64  
 1   Bedrooms                 19 non-null     int64  
 2   Bathrooms                19 non-null     int64  
 3   Location                 19 non-null     object 
 4   Year Built               19 non-null     int64  
 5   Garage Size              19 non-null     int64  
 6   Distance to School (mi)  19 non-null     float64
 7   Selling Price (k$)       19 non-null     int64  
dtypes: float64(1), int64(6), object(1)
memory usage: 1.3+ KB


In [5]:
df.isnull().sum()

Size (sq ft)               0
Bedrooms                   0
Bathrooms                  0
Location                   0
Year Built                 0
Garage Size                0
Distance to School (mi)    0
Selling Price (k$)         0
dtype: int64

In [6]:
df.dtypes

Size (sq ft)                 int64
Bedrooms                     int64
Bathrooms                    int64
Location                    object
Year Built                   int64
Garage Size                  int64
Distance to School (mi)    float64
Selling Price (k$)           int64
dtype: object

In [7]:
df.columns

Index(['Size (sq ft)', 'Bedrooms', 'Bathrooms', 'Location', 'Year Built',
       'Garage Size', 'Distance to School (mi)', 'Selling Price (k$)'],
      dtype='object')

In [8]:
#to remove whitespace
df.columns=df.columns.str.strip()
df.columns

Index(['Size (sq ft)', 'Bedrooms', 'Bathrooms', 'Location', 'Year Built',
       'Garage Size', 'Distance to School (mi)', 'Selling Price (k$)'],
      dtype='object')

In [9]:
#to replace whitespace with underscore
df.columns=df.columns.str.replace(' ','_')
df.columns

Index(['Size_(sq_ft)', 'Bedrooms', 'Bathrooms', 'Location', 'Year_Built',
       'Garage_Size', 'Distance_to_School_(mi)', 'Selling_Price_(k$)'],
      dtype='object')

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 8 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Size_(sq_ft)             19 non-null     int64  
 1   Bedrooms                 19 non-null     int64  
 2   Bathrooms                19 non-null     int64  
 3   Location                 19 non-null     object 
 4   Year_Built               19 non-null     int64  
 5   Garage_Size              19 non-null     int64  
 6   Distance_to_School_(mi)  19 non-null     float64
 7   Selling_Price_(k$)       19 non-null     int64  
dtypes: float64(1), int64(6), object(1)
memory usage: 1.3+ KB


In [11]:
#to fetch the columns with numerical data types
df_numeric=df.select_dtypes(exclude=['object'])
df_numeric

Unnamed: 0,Size_(sq_ft),Bedrooms,Bathrooms,Year_Built,Garage_Size,Distance_to_School_(mi),Selling_Price_(k$)
0,1800,3,2,1990,1,0.5,250
1,2200,4,3,2005,2,1.2,400
2,1200,2,1,1985,0,3.6,150
3,3000,5,4,2010,3,0.8,700
4,2500,4,3,2000,2,2.1,500
5,1400,2,1,1970,1,2.7,175
6,2000,3,2,2008,2,0.3,350
7,1800,3,2,1995,1,1.8,300
8,1600,2,1,1980,0,4.3,125
9,2800,4,3,2015,3,0.6,650


In [12]:
df_numeric.dtypes

Size_(sq_ft)                 int64
Bedrooms                     int64
Bathrooms                    int64
Year_Built                   int64
Garage_Size                  int64
Distance_to_School_(mi)    float64
Selling_Price_(k$)           int64
dtype: object

In [13]:
#Scaling
from sklearn import preprocessing
df2=df_numeric.copy()
#scaler=preprocessing.MinMaxScaler()->default feature_range=(0,1)
scalar=preprocessing.MinMaxScaler(feature_range=(0,1))
df2.iloc[:,:]=scalar.fit_transform(df2)

In [14]:
x=df2.drop(['Selling_Price_(k$)'],axis=1)#independent variable
y=df2['Selling_Price_(k$)']#dependent variable,continuous variable

In [15]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=15)


In [16]:
from sklearn.svm import SVR #Support vector classifier 
classifier=SVR(kernel='rbf')  
classifier.fit(x_train, y_train)  
#prediction
prediction=classifier.predict(x_test)
from sklearn import metrics
MAE=metrics.mean_absolute_error(y_test,prediction)
print(MAE)
MSE=metrics.mean_squared_error(y_test,prediction)
print(MSE)
rmse=np.sqrt(metrics.mean_squared_error(y_test,prediction))
print(rmse)

0.048430443155766945
0.00427023278240555
0.06534701816001667


In [17]:
classifier.score(x_test,y_test)

0.8898212694379015

In [18]:
classifier.score(x_train,y_train)

0.9115993556020946

In [19]:
from sklearn import linear_model
lasso_reg = linear_model.Lasso(alpha=0, max_iter=100, tol=0.1)
lasso_reg.fit(x_train,y_train)

  lasso_reg.fit(x_train,y_train)
  model = cd_fast.enet_coordinate_descent(


In [20]:
lasso_reg.score(x_test,y_test)

0.9334050487985471

In [21]:
lasso_reg.score(x_train,y_train)

0.9398971836976284

In [22]:
from sklearn.linear_model import Ridge
ridge_reg= Ridge(alpha=2, max_iter=100, tol=0.1)
ridge_reg.fit(x_train,y_train)

In [23]:
ridge_reg.score(x_test,y_test)

0.644933713920079

In [24]:
ridge_reg.score(x_train,y_train)

0.8507450866302276

In [25]:
df.describe()

Unnamed: 0,Size_(sq_ft),Bedrooms,Bathrooms,Year_Built,Garage_Size,Distance_to_School_(mi),Selling_Price_(k$)
count,19.0,19.0,19.0,19.0,19.0,19.0,19.0
mean,2142.105263,3.210526,2.263158,1997.105263,1.526316,1.968421,405.789474
std,696.272448,1.182227,1.097578,18.438772,1.073334,1.459091,242.837755
min,1000.0,1.0,1.0,1950.0,0.0,0.3,100.0
25%,1700.0,2.0,1.0,1987.5,1.0,0.75,187.5
50%,2000.0,3.0,2.0,2003.0,2.0,1.5,375.0
75%,2650.0,4.0,3.0,2010.0,2.0,2.95,575.0
max,3400.0,5.0,4.0,2020.0,3.0,5.2,900.0


In [26]:
classifier.predict([[2000,3,2,2003,2,1.5]])



array([0.40745678])