# House price

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#importing libraries 


# Data Manipulation
import numpy as np
import pandas as pd

# Feature selection
from sklearn.feature_selection import VarianceThreshold


# Visualization 
import matplotlib.pyplot as plt
import seaborn as sns

# for Q-Q plots
import scipy.stats as stats

#import model libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#import model
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor



#import accuracy
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score



import warnings
warnings.filterwarnings('ignore')

## Load Data

In [None]:
# load data

train=pd.read_csv('../input/house-prices-advanced-regression-techniques/train.csv')

In [None]:
train.drop(columns=['Id'], inplace= True)

In [None]:
train.head()

In [None]:
train.shape

In [None]:
train.info()

In [None]:
#encoding categorical variable

enc_ordCat= { 'LotShape': {'Reg': 3 , 'IR1': 2 , 'IR2': 1 , 'IR3': 0 },
              'LandSlope': {'Gtl': 2 , 'Mod': 1 , 'Sev': 0},
              'ExterQual': {'Ex': 4 , 'Gd': 3 , 'TA': 2 , 'Fa': 1 , 'Po': 0},
              'ExterCond': {'Ex': 4 , 'Gd': 3 , 'TA': 2 , 'Fa': 1 , 'Po': 0},
              'BsmtQual': {'Ex': 5 , 'Gd': 4 , 'TA': 3 , 'Fa': 2 , 'Po': 1 , 'NA': 0},
              'BsmtCond': {'Ex': 5 , 'Gd': 4 , 'TA': 3 , 'Fa': 2 , 'Po': 1 , 'NA': 0},
              'BsmtExposure': {'Gd': 4 , 'Av': 3 , 'Mn': 2 , 'No': 1 , 'NA': 0},
              'BsmtFinType1': {'GLQ': 6 , 'ALQ': 5 , 'BLQ': 4 , 'Rec': 3 , 'LwQ': 2 , 'Unf': 1 , 'NA': 0},
              'BsmtFinType2': {'GLQ': 6 , 'ALQ': 5 , 'BLQ': 4 , 'Rec': 3 , 'LwQ': 2 , 'Unf': 1 , 'NA': 0},
              'HeatingQC': {'Ex': 4 , 'Gd': 3 , 'TA': 2 , 'Fa': 1 , 'Po': 0},
              'CentralAir': {'Y': 1 , 'N': 0},
              'KitchenQual': {'Ex': 4 , 'Gd': 3 , 'TA': 2 , 'Fa': 1 , 'Po': 0},
              'Functional': {'Typ': 7, 'Min1': 6 , 'Min2': 5 , 'Mod': 4 , 'Maj1': 3 , 'Maj2': 2 , 'Sev': 1 , 'Sal': 0},
              'FireplaceQu': {'Ex': 5 , 'Gd': 4 , 'TA': 3 , 'Fa': 2 , 'Po': 1 , 'NA': 0},
              'GarageType': {'2Types': 6 , 'Attchd': 5 , 'Basment': 4 , 'BuiltIn': 3 , 'CarPort': 2 , 'Detchd': 1 , 'NA': 0},
              'GarageFinish': {'Fin': 3 , 'RFn': 2 , 'Unf': 1 , 'NA': 0 },
              'GarageQual': {'Ex': 5 , 'Gd': 4 , 'TA': 3 , 'Fa': 2 , 'Po': 1 , 'NA': 0},
              'GarageCond': {'Ex': 5 , 'Gd': 4 , 'TA': 3 , 'Fa': 2 , 'Po': 1 , 'NA': 0},
              'PavedDrive': {'Y': 2 , 'P': 1 , 'N': 0}
}

In [None]:
li=list(enc_ordCat.keys())
li

In [None]:
train=train.replace(enc_ordCat)

In [None]:
train[li] = train[li].apply(pd.to_numeric)


## Split numeric and categorical feature

In [None]:
train_cat=list(train.columns[train.dtypes=='object'])
train_num=list(train.columns[train.dtypes!='object'])

In [None]:
train[train_cat].head(1)

In [None]:
train_num

## Replace missing value for numerical features

In [None]:
train[train_num].isnull().sum().sort_values(ascending=False)

#### Lets deal with one feature at time

In [None]:
# 1. FireplaceQu 

plt.scatter(x=train['FireplaceQu'], y=train['SalePrice'], alpha=0.5)


# Decorate
plt.title('Fire place Quality before replacing missing value')
plt.xlabel('FireplaceQu')
plt.ylabel('SalePrice')
plt.show()

In [None]:
train.FireplaceQu.value_counts()

In [None]:
miss_val = np.where(train['FireplaceQu'].isnull(), 0 ,None)

In [None]:
plt.scatter(x=train['FireplaceQu'],y=train['SalePrice'], color='blue', marker="+", label='old')
plt.scatter(x=miss_val,y=train['SalePrice'],  marker="*", color='red', label='latest')


plt.title('Fire place Quality before replacing missing value')
plt.xlabel('FireplaceQu')
plt.ylabel('SalePrice')
plt.legend(loc='upper left');
plt.show()

In [None]:
# 2. LotFrontage

plt.scatter(train['LotFrontage'], train['SalePrice'], alpha=0.5)

# Decorate
plt.title('LotcFrontage before replacing missing value')
plt.xlabel('LotFrontage')
plt.ylabel('SalePrice')
plt.show()

In [None]:
train['LotFrontage'].mean()

In [None]:
miss_val=np.where(train['LotFrontage'].isnull(),train['LotFrontage'].mean(),None)

In [None]:
# most of the data is in between 50 to 100, lets replace the missing value with mean.

train['LotFrontage']=train['LotFrontage'].fillna(train['LotFrontage'].mean())

In [None]:
plt.scatter(x=train['LotFrontage'],y=train['SalePrice'], color='blue', marker="+", label='old')
plt.scatter(x=miss_val,y=train['SalePrice'],  marker="*", color='red', label='latest')


# Decorate
plt.title('LotcFrontage before replacing missing value')
plt.xlabel('LotFrontage')
plt.ylabel('SalePrice')
plt.legend(loc='upper left');
plt.show()

In [None]:
# 3. GarageFinish

plt.scatter( train['GarageFinish'], train['SalePrice'], alpha=0.5)

# Decorate
plt.title('Garage Finish before replacing missing value')
plt.xlabel('GarageFinish')
plt.ylabel('SalePrice')
plt.show()

In [None]:
train['GarageFinish'].mean()

In [None]:
miss_val=np.where(train['GarageFinish'].isnull(),train['GarageFinish'].mean(),None)

In [None]:
sns.countplot(train['GarageFinish'])

In [None]:
# Lets replae with mean value

train['GarageFinish']=train['GarageFinish'].fillna(train['GarageFinish'].mean())

In [None]:
plt.scatter(x=train['GarageFinish'],y=train['SalePrice'], color='blue', marker="+", label='old')
plt.scatter(x=miss_val,y=train['SalePrice'],  marker="*", color='red', label='latest')


# Decorate
plt.title('Garage Finish before replacing missing value')
plt.xlabel('GarageFinish')
plt.ylabel('SalePrice')
plt.legend(loc='upper left');
plt.show()

In [None]:
# 4. GarageYrBlt

plt.scatter(train['GarageYrBlt'], train['SalePrice'], alpha=0.5)

# Decorate
plt.title('GarageYrBlt before replacing missing value')
plt.xlabel('GarageYrBlt')
plt.ylabel('SalePrice')
plt.show()

In [None]:
train['GarageYrBlt'].mode()

In [None]:
train['GarageYrBlt'].mean()

In [None]:
plt.scatter(train['YearBuilt'], train['SalePrice'], alpha=0.5)

In [None]:
plt.scatter(train['GarageYrBlt'],train['YearBuilt'])

##### from the above graph it seems like the garage was built in same year in which the house was build, so lets map the same year for garage.

In [None]:
train[train['GarageYrBlt'].isnull()][['GarageYrBlt','YearBuilt']]

In [None]:
miss_val=np.zeros(train.shape[0])

In [None]:
indx=list(train[train['GarageYrBlt'].isnull()][['GarageYrBlt','YearBuilt']].index)

In [None]:
miss_val==0

In [None]:
for i in indx:
    miss_val[i]=train['YearBuilt'][i]
    train['GarageYrBlt'][i]=train['YearBuilt'][i]
   

In [None]:
miss_val=np.where(miss_val==0, None, miss_val)

In [None]:
plt.scatter(x=train['GarageYrBlt'],y=train['SalePrice'], color='blue', marker="+", label='old')
plt.scatter(x=miss_val,y=train['SalePrice'],  marker="*", color='red', label='latest')


# Decorate
plt.title('GarageYrBlt after replacing missing value')
plt.xlabel('GarageYrBlt')
plt.ylabel('SalePrice')
plt.legend(loc='upper left');
plt.show()

In [None]:
train[train_num].isnull().sum().sort_values(ascending=False)

In [None]:
# 'GarageType'

train.GarageType.value_counts()

In [None]:
plt.scatter(train['GarageType'],train['SalePrice'], alpha=0.5)

# Decorate
plt.title('GarageType after replacing missing value')
plt.xlabel('GarageType')
plt.ylabel('SalePrice')
plt.show()

In [None]:
train['GarageType'].mean()

In [None]:
miss_val=np.where(train['GarageType'].isnull(),train['GarageType'].mode()[0] ,None)

In [None]:
# lets replace with mode()

train['GarageType']=train['GarageType'].fillna(train['GarageType'].mode()[0])

In [None]:
plt.scatter(x=train['GarageType'],y=train['SalePrice'], color='blue', marker="+", label='old')
plt.scatter(x=miss_val,y=train['SalePrice'],  marker="*", color='red', label='latest')


# Decorate
plt.title('GarageType after replacing missing value')
plt.xlabel('GarageType')
plt.ylabel('SalePrice')
plt.legend(loc='upper left');
plt.show()

In [None]:
## 'GarageQual'

plt.scatter(train['GarageQual'],train['SalePrice'], alpha=0.5)

# Decorate
plt.title('GarageQual after replacing missing value')
plt.xlabel('GarageQual')
plt.ylabel('SalePrice')
plt.show()

In [None]:
train.GarageQual.value_counts()

In [None]:
# lets replace with mode()

train['GarageQual']=train['GarageQual'].fillna(train['GarageQual'].mode()[0])

In [None]:
# GarageCond

train.GarageCond.value_counts()

In [None]:
train['GarageCond']=train['GarageCond'].fillna(train['GarageCond'].mode()[0])

In [None]:
# BsmtExposure

train.BsmtExposure.value_counts()

In [None]:
train['BsmtExposure']=train['BsmtExposure'].fillna(train['BsmtExposure'].mode()[0])

In [None]:
# BsmtFinType2

train.BsmtFinType2.value_counts()

In [None]:
train['BsmtFinType2']=train['BsmtFinType2'].fillna(train['BsmtFinType2'].mode()[0])

In [None]:
# BsmtCond

train.BsmtCond.value_counts()

In [None]:
train['BsmtCond']=train['BsmtCond'].fillna(train['BsmtCond'].mode()[0])

In [None]:
train[train_num].isnull().sum().sort_values(ascending=False)

In [None]:
# BsmtQual

train['BsmtQual'].value_counts()

In [None]:
train['BsmtQual'].mean()

In [None]:
train['BsmtQual']=train['BsmtQual'].fillna(train['BsmtQual'].mean())

In [None]:
# BsmtFinType1

train['BsmtFinType1'].value_counts()

In [None]:
train['BsmtFinType1'].mean()

In [None]:
train['BsmtFinType1']=train['BsmtFinType1'].fillna(train['BsmtFinType1'].mean())

In [None]:
# MasVnrArea

plt.scatter(train['MasVnrArea'], train['SalePrice'], alpha=0.5)

# Decorate
plt.title('MasVnrArea after replacing missing value')
plt.xlabel('MasVnrArea')
plt.ylabel('SalePrice')
plt.show()

In [None]:
train['MasVnrArea'].mean()

In [None]:
train['MasVnrArea']=train['MasVnrArea'].fillna(train['MasVnrArea'].mean())

In [None]:
train[train_num].isnull().sum().sort_values(ascending=False)

#### Done with numerical missing value, Lets see categorical missing value

In [None]:
train[train_cat].isnull().sum().sort_values(ascending=False)

In [None]:
miss_feat= list(train.columns[train.isnull().any()])

In [None]:
miss_feat

In [None]:
## replace all the missin value with text 'missing'

for col in miss_feat:
    train[col]=np.where(train[col].isnull(),'missing', train[col])

In [None]:
train.isnull().sum().sort_values(ascending=False)

#### All the missing value is handled

In [None]:
train.shape

#### Now Lets combine some of the features to single feature.

In [None]:
column=train.columns

In [None]:
column

In [None]:
def similar_feat(arr,st):
    
    li=[]
    
    for ele in arr:
        if st in ele:
            li.append(ele)
    return li

In [None]:
basement=similar_feat(column,'Bsmt')
print(basement)

In [None]:
def print_value_count(feat):
    
    for ele in feat:
        print('the different value of {} is {}'.format(ele, train[ele].nunique()))
        print(train[ele].value_counts(),'\n')
        

In [None]:
print_value_count(basement)

#### From above analysis we can conclude that all the quality and condition features can be combine to single feature say 'BsmtScore'. 

In [None]:
def add_feat(feat,feat_list):
    
    train[feat]=pd.Series([0]*1460)
    for ele in feat_list:
        train[feat]=train[feat]+ train[ele]

In [None]:
add_feat('BsmtScore',['BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',  'BsmtFinType2', 'BsmtFullBath', 'BsmtHalfBath'])

In [None]:
train[['BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',  'BsmtFinType2', 'BsmtFullBath', 'BsmtHalfBath','BsmtScore']]

In [None]:
# Now for the features related to Basement area, we can observe that (BsmtFinSF1 + BsmtFinSF2) + BsmtUnfSF = TotalBsmtSF

# So here only keeping BsmtUnfSF , TotalBsmtSF would be enough , i mean keeping any 2 of these features should suffice.

In [None]:
train[['BsmtFinSF1','BsmtFinSF2','BsmtUnfSF','TotalBsmtSF']]

In [None]:
# Lets drop the remaining features for Basement.

train.drop(['BsmtFinSF1','BsmtFinSF2','BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'BsmtUnfSF', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath'], axis=1 , inplace=True)

In [None]:
# Similary Garage have also many number of features

garage=similar_feat(column,'Garage')
print(garage)

In [None]:
def print_value_count(feat):
    
    for ele in feat:
        print('the different value of {} is {}'.format(ele, train[ele].nunique()))
        print(train[ele].value_counts(),'\n')
        

In [None]:
print_value_count(garage)

#### Here also we can combine some features like ['GarageType', 'GarageFinish', 'GarageCars', 'GarageQual', 'GarageCond'] to single feature say GarageScore

In [None]:
add_feat('GarageScore',['GarageType', 'GarageFinish', 'GarageCars', 'GarageQual', 'GarageCond'])

In [None]:
train[['GarageType', 'GarageFinish', 'GarageCars', 'GarageQual', 'GarageCond','GarageScore']]

In [None]:
train.drop(['GarageType', 'GarageFinish', 'GarageCars', 'GarageQual', 'GarageCond'],axis=1, inplace=True)

In [None]:
train.columns

In [None]:
# combine ExterCond and ExterQual to one feature

add_feat('ExtrScore',['ExterCond', 'ExterQual'])

In [None]:
train[['ExterCond', 'ExterQual','ExtrScore']]

In [None]:
# combine ExterCond and ExterQual to one feature

add_feat('OvrallScore',['OverallQual', 'OverallCond'])

In [None]:
train[['OverallQual', 'OverallCond','OvrallScore']]

In [None]:
train.drop(['OverallQual', 'OverallCond','ExterQual','ExtrScore'], axis=1, inplace=True)

In [None]:
train.columns

In [None]:
# Similary Garage have also many number of features

Lot=similar_feat(column,'Lot')
print(Lot)

In [None]:
def print_value_count(feat):
    
    for ele in feat:
        print('the different value of {} is {}'.format(ele, train[ele].nunique()))
        print(train[ele].value_counts(),'\n')
        

In [None]:
print_value_count(Lot)

In [None]:
## these above features cannot be combine

In [None]:
train.columns

In [None]:
train.Fireplaces.value_counts()

In [None]:
train_cat=list(train.columns[train.dtypes=='object'])
train_num=list(train.columns[train.dtypes!='object'])

## Remove constant features

In [None]:
# Checking ZERO variance for categorical features

# to find variables that contain only 1 label/value
# we use the nunique() method from pandas, which returns the number
# of different values in a variable.

constant_feat_cat = [feat for feat in train[train_cat].columns if train[train_cat][feat].nunique() == 1]

constant_feat_cat

In [None]:
# Checking ZERO variance for numerical features


# This method works for only numeric features.
# short and easy: find constant features

# in this dataset, all features are numeric,


constant_feat_num = [feat for feat in train[train_num].columns if train[train_num][feat].std() == 0]

constant_feat_num

## Quasi feature selection (remove features having very low variance)

In [None]:
len(train_num)

In [None]:
# method 1:

sel = VarianceThreshold(threshold=0.05)  

sel.fit(train[train_num])  # fit finds the features with low variance

In [None]:
sel.get_support()

In [None]:
col_to_drop=train[train_num].columns[~sel.get_support()][0]

In [None]:
col_to_drop

In [None]:
train.drop(col_to_drop,axis=1,inplace=True)

In [None]:
# Method 2: find the columns which have more than 99% same value.

def find_low_var_col(df,threshold):
    low_var_col=[]
    row=df.shape[0]
    for col in df.columns:
        mode_count=(df[col]==(df[col].mode()[0])).sum()
        if ((mode_count/row)*100) > threshold :
            low_var_col.append(col)
            
    return low_var_col

In [None]:
train_low_var=find_low_var_col(train, 99)

In [None]:
train_low_var

In [None]:
train.drop(train_low_var,axis=1,inplace=True)

In [None]:
train.shape

### Check for duplicate features

In [None]:
train_cat=list(train.columns[train.dtypes=='object'])
train_num=list(train.columns[train.dtypes!='object'])

In [None]:
# check for duplicated features in the training set
def find_duplicate_feat(df):
 
        duplicated_feat = []
        for i in range(0, len(df.columns)):

            col_1 = df.columns[i]

            for col_2 in df.columns[i + 1:]:
                if (col_2 not in duplicated_feat) and (df[col_1].equals(df[col_2])):
                    duplicated_feat.append(col_2)

        return duplicated_feat

In [None]:
train_num_dup=find_duplicate_feat(train[train_num])

In [None]:
train_num_dup

In [None]:
train_cat_dup=find_duplicate_feat(train[train_cat])

In [None]:
train_cat_dup

## Correlation feature selection

In [None]:
plt.figure(figsize=(40,40))
sns.heatmap(train.corr(),annot=True, mask=np.triu(train.corr()))
plt.ylim(40,0)

In [None]:
# find list of high correlated features with other features. to overcome multicollinearity

def high_corr_feat():
    
    feat = set()  # Set of all features which are highely correlated to other feature.
    corr_matrix = train.corr()
    
    for i in range(1,len(corr_matrix.columns)):
        for j in range(i):
    
            if ((corr_matrix.iloc[i, j]) > 0.8) or ((corr_matrix.iloc[i, j])<-0.4):
                colname = corr_matrix.columns[i]  # getting the name of column
                feat.add(colname)
    
    return list(feat)

In [None]:
train_hgh_corr_col=high_corr_feat()

In [None]:
train_hgh_corr_col

In [None]:
train.drop(train_hgh_corr_col, axis=1, inplace=True)

In [None]:
train.head(2)

In [None]:
# drop dependent feature.

y=train['SalePrice']

train=train.drop('SalePrice', axis=1)

In [None]:
# function to create distribution, histogram, Q-Q plot and boxplot


def diagnostic_plots(df):
    # function takes a dataframe (df) and
    # the list of variables of interest as arguments

    col=df._get_numeric_data().columns
    
    for ele in col:
        
        # define figure size
        plt.figure(figsize=(16, 4))
        
        # distribution
        plt.subplot(1, 4, 1)
        sns.distplot(df[ele])
        plt.title('Distribution')
        
        # histogram
        plt.subplot(1, 4, 2)
        sns.histplot(df[ele], bins=30)
        plt.title('Histogram')

        # Q-Q plot
        plt.subplot(1, 4, 3)
        stats.probplot(df[ele], dist="norm", plot=plt)
        plt.ylabel('Variable quantiles')

        # boxplot
        plt.subplot(1, 4, 4)
        sns.boxplot(y=df[ele])
        plt.title('Boxplot')
        
        
        plt.show()

    

In [None]:
diagnostic_plots(train)

In [None]:

def print_skew(df):
    
    df=df._get_numeric_data()
    for col in df.columns:
        print(col,' ', df[col].skew())

In [None]:
print_skew(train)

In [None]:
# Remove skewness

# for normal symmetry the skewness should be fairly betwen -0.5 to 0.5

def remove_skew(df):
    
    col=df._get_numeric_data().columns
    
    for ele in col:
        
        if abs(df[ele].skew())>0.75:
            df[ele]=np.log(df[ele]+1)


In [None]:
remove_skew(train)

In [None]:
print_skew(train)

In [None]:
diagnostic_plots(train)

## Deal with outlier

In [None]:
def mod_outlier(df):
        
        num_col = df._get_numeric_data().columns
        
        for col in num_col:

            #q1 = df[col].quantile(0.25)
            #q3 = df[col].quantile(0.75)

            iqr = df[col].quantile(0.75) - df[col].quantile(0.25)    # iqr= q3-q1

            lower_bound = df[col].quantile(0.25) -(2 * iqr) 
            upper_bound = df[col].quantile(0.75) +(2 * iqr)

            df[col]= np.where(df[col] > upper_bound, upper_bound , np.where(df[col] < lower_bound, lower_bound, df[col]))    


In [None]:
mod_outlier(train)

In [None]:
train=pd.get_dummies(train, drop_first=True, columns=train_cat)

#### Splitting the dataset into the Training set and Test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train, y, test_size = 0.2, random_state = 43)

## Feature scaling

In [None]:
#Common for all model

#feature scaling

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [None]:
def accuracy(yt,yp):
    return round(metrics.r2_score(yt, yp)*100 , 2)


model_acc={}

## Random Forest

In [None]:
regressor = RandomForestRegressor(n_estimators = 100, random_state = 43)
regressor.fit(X_train, y_train)

y_pred = (regressor.predict(X_test))

model_acc['Random Forest']=accuracy(y_test, y_pred)

## decision tree

In [None]:
regressor = DecisionTreeRegressor(random_state = 43)
regressor.fit(X_train, y_train)

y_pred = (regressor.predict(X_test))

model_acc['Decision Tree']=accuracy(y_test, y_pred)

## MultipleRegression module

In [None]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

y_pred = (regressor.predict(X_test))

model_acc['MultipleRegression Module']=accuracy(y_test, y_pred)

## SVM

In [None]:
regressor = SVR(kernel = 'linear')
regressor.fit(X_train, y_train)

y_pred = (regressor.predict(X_test))

model_acc['SVM']=accuracy(y_test, y_pred)

## XGBOOST

In [None]:
regressor = XGBRegressor()
regressor.fit(X_train,y_train)

y_pred = (regressor.predict(X_test))

model_acc['XGBOOST']=accuracy(y_test, y_pred)

In [None]:
model_acc

In [None]:
'The End'