In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
train_data = pd.read_csv("../input/house-prices-advanced-regression-techniques/train.csv")
test_data = pd.read_csv("../input/house-price-prediction/test.csv")

In [None]:
train_data.head(10)

In [None]:
test_data.head(10)

In [None]:
train_data.info()

In [None]:
train_data.shape


In [None]:
test_data.shape

In [None]:
train_data.columns

In [None]:
test_data.columns

In [None]:
train_data.describe()

In [None]:
train_data.isnull().sum()

In [None]:
train_data.drop('LotFrontage',axis=1,inplace=True)

In [None]:
test_data.isnull().sum()

In [None]:
test_data.drop('LotFrontage',axis=1,inplace=True)

In [None]:
test_data.drop('SaleType',axis=1,inplace=True)

In [None]:
plt.hist(train_data["SalePrice"], 20, lw = 0.89, ec = 'black')
plt.xlabel('Sale Price')
plt.ylabel('frequency')
plt.title('Distribution of Sale Price of House')


In [None]:
max_sp = train_data['SalePrice'].max()
print("The largest Sale Price:", max_sp,'USD')

In [None]:
cols = ['SalePrice', 'OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt']
sns.pairplot(train_data[cols], height = 2.5)

In [None]:
train_data.corr()

In [None]:
plt.figure(figsize=(30,15))
a = sns.heatmap(train_data.corr(), annot=True, fmt='.1f')
sns.heatmap(train_data.corr())


In [None]:
test_data['SalePrice'] = 0

In [None]:
data = pd.concat([train_data , test_data] , axis = 0)

In [None]:
data.describe()

In [None]:
data.head()

In [None]:
data.isnull().sum()

In [None]:
data['BsmtFinSF1'] = data['BsmtFinSF1'].fillna(data['BsmtFinSF1'].mean())
data['BsmtFinSF2'] = data['BsmtFinSF2'].fillna(data['BsmtFinSF2'].mean())
data['BsmtUnfSF'] = data['BsmtUnfSF'].fillna(data['BsmtUnfSF'].mean())
data['TotalBsmtSF'] = data['TotalBsmtSF'].fillna(data['TotalBsmtSF'].mean())
data['GarageCars'] = data['GarageCars'].fillna(data['GarageCars'].mean())
data['GarageArea'] = data['GarageArea'].fillna(data['GarageArea'].mean())

In [None]:
data.drop(['Alley' , 'PoolQC' , 'Fence' , 'MiscFeature'], axis = 1 , inplace = True)
data.drop(['GarageYrBlt'], axis = 1 , inplace = True)

In [None]:
data['MSZoning'] = data['MSZoning'].fillna(data['MSZoning'].mode()[0])
data['MasVnrType'] = data['MasVnrType'].fillna(data['MasVnrType'].mode()[0])
data['BsmtQual'] = data['BsmtQual'].fillna(data['BsmtQual'].mode()[0])
data['BsmtCond'] = data['BsmtCond'].fillna(data['BsmtCond'].mode()[0])
data['MasVnrArea'] = data['MasVnrArea'].fillna(data['MasVnrArea'].mode()[0])
data['BsmtFullBath'] = data['BsmtFullBath'].fillna(data['BsmtFullBath'].mode()[0])
data['BsmtHalfBath'] = data['BsmtHalfBath'].fillna(data['BsmtHalfBath'].mode()[0])
data['KitchenQual'] = data['KitchenQual'].fillna(data['KitchenQual'].mode()[0])
data['BsmtFinType2'] = data['BsmtFinType2'].fillna(data['BsmtFinType2'].mode()[0])
data['FireplaceQu'] = data['FireplaceQu'].fillna(data['FireplaceQu'].mode()[0])
data['BsmtExposure'] = data['BsmtExposure'].fillna(data['BsmtExposure'].mode()[0])
data['BsmtFinType1'] = data['BsmtFinType1'].fillna(data['BsmtFinType1'].mode()[0])

data['Exterior2nd'] = data['Exterior2nd'].fillna(data['Exterior2nd'].mode()[0])
data['Electrical'] = data['Electrical'].fillna(data['Electrical'].mode()[0])
data['Functional'] = data['Functional'].fillna(data['Functional'].mode()[0])

data['GarageType'] = data['GarageType'].fillna(data['GarageType'].mode()[0])
data['GarageFinish'] = data['GarageFinish'].fillna(data['GarageFinish'].mode()[0])
data['GarageQual'] = data['GarageQual'].fillna(data['GarageQual'].mode()[0])
data['GarageCond'] = data['GarageCond'].fillna(data['GarageCond'].mode()[0])
data['SaleType'] = data['SaleType'].fillna(data['SaleType'].mode()[0])
data['Utilities'] = data['Utilities'].fillna(data['Utilities'].mode()[0])
data['Exterior1st'] = data['Exterior1st'].fillna(data['Exterior1st'].mode()[0])

In [None]:
data.info()

In [None]:
columns=['MSZoning','Street','LotShape','LandContour','Utilities','LotConfig','LandSlope','Neighborhood',
         'Condition2','BldgType','Condition1','HouseStyle','SaleType','SaleCondition','ExterCond',
         'ExterQual','Foundation','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2',
        'RoofStyle','RoofMatl','Exterior1st','Exterior2nd','MasVnrType','Heating','HeatingQC','CentralAir',
         'Electrical','KitchenQual','Functional','FireplaceQu','GarageType','GarageFinish','GarageQual','GarageCond','PavedDrive']

In [None]:
len(columns)

In [None]:
def category(multcolumns):
    data_final=data
    i=0
    for field in multcolumns:
        
        df1=pd.get_dummies(data[field],drop_first=True)
        
        data.drop([field],axis=1,inplace=True)
        if i==0:
            data_final=df1.copy()
        else:
            
            data_final=pd.concat([data_final,df1],axis=1)
        i=i+1
       
        
    data_final=pd.concat([data,data_final],axis=1)
        
    return data_final

In [None]:
data = category(columns)

In [None]:
data = data.loc[:,~data.columns.duplicated()]

In [None]:
train_data = data.iloc[:1460,:]
test_data = data.iloc[1460:,:]

In [None]:
test_data.drop(['SalePrice'] , axis = 1 , inplace = True)

In [None]:
y_train = train_data['SalePrice']
x_train = train_data.drop(['SalePrice'] , axis = 1)

In [None]:
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
x_train = scalar.fit_transform(x_train)
test_data = scalar.transform(test_data)

In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x_train,y_train)

In [None]:
#y_tr = lr.predict(x_train)
reg.coef_

In [None]:

reg.intercept_

In [None]:
reg.score(x_train,y_train)

In [None]:
result = reg.predict(x_train)

In [None]:
plt.figure(figsize = (6,4))
plt.plot(y_train, 'o',color = 'blue', label = 'Actual Values')
plt.plot(result, color = 'red', label = 'Predicted Values')
plt.legend()

In [None]:
test_data = pd.DataFrame()
test_data['Actual Values'] = y_train
test_data['Predicted_values'] = result
test_data