In [1]:
# Importing all the necessary libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
pd.options.display.float_format = '{:.2f}'.format
import warnings as wa
wa.filterwarnings('ignore')

from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF
import statsmodels.api as sm
import scipy.stats as ss

from sklearn.preprocessing import PowerTransformer

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

from sklearn.linear_model import LinearRegression

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor

from mlxtend.feature_selection import SequentialFeatureSelector as sfs

from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae

In [2]:
# data import

In [3]:
base = pd.read_csv('../input/la-unclean/LA_Unclean.csv')

In [4]:
df = base.copy(deep = True)
df.head()

In [5]:
df.describe(include=np.number).transpose()

# Bathrooms and calendar_updated has 100% null values
# Most numerical variables are skewed.

In [6]:
df.describe(include=object).transpose()

# listing_url, name, description, picture_url,host_url, host_name, host_since, host_location, host_about, host_thumbnail_url
# host_picture_url, amenities, first_review, last_review, license
# has to many unique values to have any impact on the model

# host_response_rate, host_acceptance_rate and price are all numerical values but is present as categorical. Hence we can treat them first.

In [7]:
# Remove special symbol from Price column and convert to numeric dtype

df['price']=df['price'].str.replace('$','').str.replace(',','').astype(float)

In [8]:
# Remove special symbol from host response rate & host acceptance rate column and convert to numeric dtype

df['host_response_rate']=df['host_response_rate'].str.replace('%','').str.replace(',','').astype(float)

df['host_acceptance_rate']=df['host_acceptance_rate'].str.replace('%','').str.replace(',','').astype(float)

In [9]:
# Droping columns with 100% nulL values

df=df.drop(['calendar_updated','bathrooms'],axis=1)

In [10]:
# Removing irrelevant columns:

df=df.drop([ 'id', 'listing_url', 'scrape_id', 'last_scraped', 'name', 'description', 'neighborhood_overview','picture_url',
 'host_id', 'host_url', 'host_name', 'host_since', 'host_location', 'host_about', 'host_thumbnail_url', 'host_picture_url',
 'host_neighbourhood', 'host_listings_count', 'host_verifications', 'host_has_profile_pic', 'neighbourhood', 'neighbourhood_cleansed',
 'property_type', 'calendar_last_scraped', 'first_review', 'last_review', 'license', 'host_identity_verified'],axis=1)

In [11]:
df.shape

In [12]:
# list of columns

In [13]:
df.columns.tolist()

In [14]:
# Data Pre_processing

In [15]:
# Treating Null values

In [16]:
df.isnull().sum()[df.isnull().sum()  > 0 ]

In [17]:
data=pd.DataFrame({'Dtype':df.dtypes,'Null':np.round(df.isnull().sum()/len(df)*100,2),'Unique':df.nunique()})
data=data[data.Null>0]

In [18]:
# Categorical Variables

In [19]:
# host_response_time

In [20]:
(df.host_response_time.value_counts() / len(df.host_response_time)) * 100

In [21]:
# Proportion Imputation

df['host_response_time'] = df['host_response_time'].fillna(pd.Series(np.random.choice(['within an hour','within a few hours', 'within a day', 'a few days or more'], p = [0.60,0.30,0.07,0.03], size = len(df))))

In [22]:
# host_is_superhost

df.host_is_superhost.fillna(value = df.host_is_superhost.mode()[0], inplace = True)

In [23]:
# bathrooms_text

df.bathrooms_text.fillna(value = df.bathrooms_text.mode()[0], inplace = True)

In [24]:
# Numerical Variable

In [25]:
count=1
plt.figure(figsize=(10,50))
for i in df.select_dtypes(np.number):
    plt.subplot(21,2,count)
    sns.distplot(df[i])
    plt.xticks(rotation=90)
    plt.tight_layout()
    count+=1

In [26]:
# The numerical variables are skewed hence median imputation will have better result.

In [27]:
((df.select_dtypes(np.number).isnull().sum()[df.select_dtypes(np.number).isnull().sum() > 0 ] / len(df) ) * 100)[((df.select_dtypes(np.number).isnull().sum()[df.select_dtypes(np.number).isnull().sum() > 0 ] / len(df) ) * 100).values < 10]

In [28]:
# Imputing variables with less than 5% missing values with median.

In [29]:
df.host_total_listings_count.fillna(value=df.host_total_listings_count.median(), inplace = True)

In [30]:
df.beds.fillna(value = df.beds.median(), inplace = True)

In [31]:
df.minimum_minimum_nights.fillna(value = df.minimum_maximum_nights.median(), inplace = True)

In [32]:
df.maximum_minimum_nights.fillna(value = df.maximum_minimum_nights.median(), inplace = True)

In [33]:
df.minimum_maximum_nights.fillna(value = df.minimum_maximum_nights.median(), inplace = True)

In [34]:
df.minimum_nights_avg_ntm.fillna(value = df.minimum_nights_avg_ntm.median(), inplace = True)

In [35]:
df.maximum_nights_avg_ntm.fillna(value = df.maximum_nights_avg_ntm.median(), inplace = True)

In [36]:
df.maximum_maximum_nights.fillna(value = df.maximum_maximum_nights.median(), inplace = True)

In [37]:
df.isnull().sum()[df.isnull().sum() > 0]

In [38]:
# Imputing numerical variables with more than 5% missing values with logical imputation

In [39]:
# The host_response_rate and host_acceptance_rate can be imputed with logical imputation using host_response_time

# host_response_rate

df.host_response_rate = df.groupby(by = 'host_response_time')['host_response_rate'].apply(lambda x : x.fillna(x.median()))

In [40]:
# Acceptance_response_rate

df.host_acceptance_rate = df.groupby(by = 'host_response_time')['host_acceptance_rate'].apply(lambda x : x.fillna(x.median()))

In [41]:
# The bedrooms,review_scores_accuracy, review_scores_rating, review_scores_cleanliness,
# review_scores_checkin, review_scores_communication, review_scores_location ,review_scores_value and reviews_per_month 
# can be imputed with logical imputation using room_type

In [42]:
df.bedrooms = df.groupby('room_type')['bedrooms'].apply(lambda x : x.fillna(x.median()))

In [43]:
df.review_scores_accuracy = df.groupby('room_type')['review_scores_accuracy'].apply(lambda x : x.fillna(x.median()))

In [44]:
df.review_scores_rating = df.groupby('room_type')['review_scores_rating'].apply(lambda x : x.fillna(x.median()))

In [45]:
df.review_scores_cleanliness = df.groupby('room_type')['review_scores_cleanliness'].apply(lambda x : x.fillna(x.median()))

In [46]:
df.review_scores_checkin = df.groupby('room_type')['review_scores_checkin'].apply(lambda x : x.fillna(x.median()))

In [47]:
df.review_scores_communication = df.groupby('room_type')['review_scores_communication'].apply(lambda x : x.fillna(x.median()))

In [48]:
df.review_scores_location = df.groupby('room_type')['review_scores_location'].apply(lambda x : x.fillna(x.median()))

In [49]:
df.review_scores_value = df.groupby('room_type')['review_scores_value'].apply(lambda x : x.fillna(x.median()))

In [50]:
df.reviews_per_month = df.groupby('room_type')['reviews_per_month'].apply(lambda x : x.fillna(x.median()))

In [51]:
# Feature Engineering

# Creating 2 new variable from the amenities and bathroom_text columns

In [52]:
# Creating a new variable amenities with the len of the variables present in amenities

df.amenities = df.amenities.apply(lambda x : len(x.split(',')))

#Change datatype to int

df.amenities=df.amenities.astype(int)

In [53]:
# Creating a new variable bathrooms_text with the numerical part from bathrooms_text

df.bathrooms_text.value_counts()

In [54]:
for i in range(len(df.bathrooms_text)):
    df.bathrooms_text[i] = df.bathrooms_text[i].split()[0]

In [55]:
df.bathrooms_text.replace(to_replace=['Half-bath','Shared','Private'], value = '0.5' , inplace=True )

# Changing datatype to float

df.bathrooms_text = df.bathrooms_text.astype(float)

In [56]:
df.shape

In [57]:
df.isnull().sum()

### Checking for Outliers

In [58]:
plt.figure(figsize=(15,20))
df.boxplot()
plt.xticks(rotation=90)
plt.show()

From the above boxplot we can clearly see that there are outliers in the all most all the columns.
For the moment we will not treat the outliers, we can see the impact of outliers have on the model and chose a better method to treat them.

## Exploratory Data Analysis

### Univariate analysis

#### Categorical variable

In [59]:
for i in df.select_dtypes(include=object):
    plt.figure(figsize=(10,8))
    plt.pie(x=df[i].value_counts(),labels=df[i].unique(),autopct='%.2f')
    plt.title(i)
    plt.show()

#### Numerical variable

In [60]:
# Distribution of numerical variables

for i in df.select_dtypes(np.number).columns:
    plt.figure(figsize=(10,8))
    sns.distplot(df[i])
    plt.title(i)
    plt.xlabel(i)
    plt.ylabel('Density')
    plt.show()

In [61]:
# Box Plot

for i in df.select_dtypes(np.number).columns:
    plt.figure(figsize=(10,8))
    sns.boxplot(y=df[i])
    plt.title(i)
    plt.show()

### Bivariate analysis

In [62]:
# Room_type vs instant_bookable

plt.figure(figsize=(10,8))
sns.countplot(x=df.room_type,hue=df.instant_bookable)
plt.show()

In [63]:
# We can see that the majority of the business is done in the Entire home/apt session and Private rooms

In [64]:
# Room_type vs Host_response_time

plt.figure(figsize=(10,8))
sns.countplot(x=df.room_type,hue=df.host_response_time)
plt.show()

In [65]:
# Clearly we can see that most host in Entire home/apt segment response within an hour of the booking request

#### Multivariate analysis

In [66]:
# Host_response_time vs host_acceptance_rate vs host_is_superhost

plt.figure(figsize=(15,8))
sns.boxplot(x=df.host_response_time,y=df['host_acceptance_rate'],hue=df.host_is_superhost)
plt.legend(loc='lower right')
plt.show()

In [67]:
# Most host who respond to a booking within an hour tends to have high acceptance rate and are more likely to be super_host.

In [68]:
# Latitude vs Longitude vs neighbourhood_group_cleansed
plt.figure(figsize=(10,8))
sns.scatterplot(x=df.latitude,y=df.longitude,hue=df.neighbourhood_group_cleansed)
plt.show()

In [69]:
# We can see than some of the host who is not located in city of Los_Angeles is classified as in city of Los_Angeles.

In [70]:
# Latitude vs Longitude vs Room_type

plt.figure(figsize=(10,8))
sns.scatterplot(x=df.latitude,y=df.longitude,hue=df.room_type)
plt.savefig('Lat-Long-Room.png')
plt.show()

In [71]:
# The major part of the city of Los_Angeles os covered with Enitre home/apt and as we move aways from the
# city center we get to see a mixture of type of rooms

#### Correlation matrix

In [72]:
plt.figure(figsize=(30,25))
sns.heatmap(df.corr()[(df.corr() > 0.5) | (df.corr() < -0.5)], annot = True)
plt.show()

### Statistical Analysis

In [73]:
# Checking the dependency of Target on the independent variables.Categorical Variables with Target

In [74]:
# Normality test for price

# Null: The data is normally distributed
# Alternate: The data is not normally distributed

In [75]:
j,p = ss.jarque_bera(df.price)
print('The test statistics: ',j,'\n')
print("The p_value: ",p)

Since the p-value is less than alpha(i.e. 0.05) we reject the null hypothesis. Hence we can conclude price is not normally distributed. Hence we can use the non-parametric test like Mann-Whitney and Kruskal-Wallis test for checking dependency.

In [76]:
# Categorical Variables with Target

In [77]:
df.select_dtypes(include=object).describe()

In [78]:
# 1. host_response_time vs price

In [79]:
# Since it is Categorical vs numerical and has more than 2 categories, using Kruskal-Wallis test 

# Null: The host_response_time and price are independent
# Alternate: The host_response_time and price are dependent

# alpha=0.05 (Assume)

In [80]:
df.host_response_time.value_counts()

In [81]:
df1=df[df.host_response_time=='within an hour']['price']
df2=df[df.host_response_time=='within a few hours']['price']
df3=df[df.host_response_time=='within a day']['price']
df4=df[df.host_response_time=='a few days or more']['price']

In [82]:
k,p=ss.kruskal(df1,df2,df3,df4)
print('The test statistics: ',k,'\n')
print("The p_value: ",p)

Since the p-value is less than alpha(i.e. 0.05) we reject the null hypothesis. Hence we can conclude price is dependent on host_response_time.

In [83]:
# 2.host_is_superhost vs price

In [84]:
# Since it is Categorical vs numerical and has only 2 category, using Mann-Whitney test.

# Null: The host_is_superhost and price are independent
# Alternate: The host_is_superhost and price are dependent

# alpha = 0.05

In [85]:
df.host_is_superhost.value_counts()

In [86]:
df1=df[df.host_is_superhost=='f']['price']
df2=df[df.host_is_superhost=='t']['price']

In [87]:
m,p=ss.mannwhitneyu(df1,df2)
print('The test statistics: ',m,'\n')
print("The p_value: ",p)

Since the p-value is less than alpha(i.e. 0.05) we reject the null hypothesis. Hence we can conclude price is dependent on host_is_superhost.

In [88]:
# 3.neighbourhood_group_cleansed vs price

# Since it is Categorical vs numerical and has more than 2 categories, using Kruskal-Wallis test 

# Null: The neighbourhood_group_cleansed and price are independent
# Alternate: The neighbourhood_group_cleansed and price are dependent

# alpha=0.05 (Assume)

In [89]:
df.neighbourhood_group_cleansed.value_counts()

In [90]:
df1=df[df.neighbourhood_group_cleansed=='City of Los Angeles']['price']
df2=df[df.neighbourhood_group_cleansed=='Other Cities']['price']
df3=df[df.neighbourhood_group_cleansed=='Unincorporated Areas']['price']

In [91]:
k,p=ss.kruskal(df1,df2,df3)
print('The test statistics: ',k,'\n')
print("The p_value: ",p)

Since the p-value is less than alpha(i.e. 0.05) we reject the null hypothesis. Hence we can conclude price is dependent on neighbourhood_group_cleansed.

In [92]:
# 4.room_type vs price

# Since it is Categorical vs numerical and has more than 2 categories, using Kruskal-Wallis test 

# Null: The room_type and price are independent
# Alternate: The room_type and price are dependent

# alpha=0.05 (Assume)

In [93]:
df.room_type.value_counts()

In [94]:
df1=df[df.room_type=='Entire home/apt']['price']
df2=df[df.room_type=='Private room']['price']
df3=df[df.room_type=='Shared room']['price']
df4=df[df.room_type=='Hotel room']['price']

In [95]:
k,p=ss.kruskal(df1,df2,df3,df4)
print('The test statistics: ',k,'\n')
print("The p_value: ",p)

Since the p-value is less than alpha(i.e. 0.05) we reject the null hypothesis. Hence we can conclude price is dependent on room_type.

In [96]:
# 5. has_availability vs price

# Since it is Categorical vs numerical and has only 2 category, using Mann-Whitney test.

# Null: The has_availability and price are independent
# Alternate: The has_availability and price are dependent

# alpha = 0.05

In [97]:
df.has_availability.value_counts()

In [98]:
df1=df[df.has_availability=='f']['price']
df2=df[df.has_availability=='t']['price']

In [99]:
m,p=ss.mannwhitneyu(df1,df2)
print('The test statistics: ',m,'\n')
print("The p_value: ",p)

Since the p-value is less than alpha(i.e. 0.05) we reject the null hypothesis. Hence we can conclude price is dependent on has_availability.

In [100]:
# 6.instant_bookable vs price

# Since it is Categorical vs numerical and has only 2 category, using Mann-Whitney test.

# Null: The instant_bookable and price are independent
# Alternate: The instant_bookable and price are dependent

# alpha = 0.05

In [101]:
df.instant_bookable.value_counts()

In [102]:
df1=df[df.instant_bookable=='f']['price']
df2=df[df.instant_bookable=='t']['price']

In [103]:
m,p=ss.mannwhitneyu(df1,df2)
print('The test statistics: ',m,'\n')
print("The p_value: ",p)

Since the p-value is less than alpha(i.e. 0.05) we reject the null hypothesis. Hence we can conclude price is dependent on instant_bookable.

In [104]:
# Numerical Variables

In [105]:
df_num = df.select_dtypes(np.number)
df_num.head()

In [106]:
a = df_num.drop(columns = ['price'])
b = df_num.price
c = 1
for i in a.columns:
    z = c
    c = z + 1
    e = a[i]
    n = b
    print('\n',z,'Independent Variable =',i, ' : ', 'Target Variable = Price')
    print('\n Hypothesis for normality')
    print('\n\tH0: The data is normally distributed')
    print('\tHA: The data is not normally distributed')
    t1,p1 = ss.jarque_bera(e)
    t2,p2 = ss.jarque_bera(n)
    if p1 < 0.05 or p2 < 0.05: 
        print('\n Inference : The data is not normally distributed, hence using non parametric test - SpearMan Correlation.')
        print('\nHypothesis for SpearMan Correlation test')
        print('\n\tH0: There is no relationship between', i ,'and Price.')
        print('\tHA: There is a relationship between', i ,'and Price.')
        t3,p3 = ss.spearmanr(e,n)
        if p3< 0.05:
            print('\n Inference : Accepting to reject null hypothesis, the variable -',i,'is a significant feature to predict the target column - Price')
        else:
            print('\n Inference : Accept null hypothesis, the variable -',i,'is not a significant feature to predict the target column - Price')
    else:
        print('The data is normally distributed, hence using Parametric test - Pearson Correlation.')
        print('\nHypothesis for pearson test')
        print('\n\tH0: There is no relationship between', i ,'and Price.')
        print('\tHA: There is a relationship between', i ,'and Price.')
        t4,p4 = ss.pearsonr(e,n)
        if p4< 0.05:
             print('\n Inference : Accepting to reject null hypothesis, the variable -',i,'is a significant feature to predict the target column - Price.')
        else:
            print('\n  Inference : Accept null hypothesis, the variable -',i,'is not a significant feature to predict the target column - Price')

### Encodeing

In [107]:
# Splitting the dataset into 2 dataset one with only object datatype and two with numeric

obj = df.select_dtypes(object) 
num = df.select_dtypes(np.number)

In [108]:
obj_dumm = pd.get_dummies(obj, drop_first=True)

In [109]:
# Final concated dataset after encoding

df_final = pd.concat([obj_dumm,num], axis = 1)

In [110]:
df_final

In [111]:
import tensorflow as tf


In [112]:
df_final.info()

# Checking Multicoliniyarity

In [249]:
plt.figure(figsize=(20,12))
sns.heatmap(df_final.corr(),annot=True)

**We can see that there is a lot of multicoliniyarity in the data**

# Dropping the columns with high multicoliniyarity and using VIF number keeping the throushold as 11

In [250]:
# We can try to reduce the multicolinearity present in the data by keeping variable with VIF<11.

X = df_final.drop(columns = ['price','maximum_nights_avg_ntm','longitude','calculated_host_listings_count',
                           'review_scores_accuracy','review_scores_checkin','minimum_nights_avg_ntm','review_scores_communication',
                          'review_scores_location','review_scores_value','host_response_rate','latitude','review_scores_cleanliness',
                          'availability_60','review_scores_rating','has_availability_t','host_acceptance_rate','availability_90','bedrooms',
                          ])
y = df_final.price

#VIF
vif = [VIF(X.values, i) for i in range(X.shape[1])]
vif_df = pd.DataFrame(vif, index = X.columns, columns = ['VIF_index']).sort_values(by = ['VIF_index'], ascending=False)
vif_df

# Check for outliers

In [311]:
df_final.boxplot()

****As there is lot of outliers in the data and transformation of the features fow all the features is practically not posible and dropping the columns would lead to loss of more than 90% of the data we transform the target variable ****

# Using Power transformation on Target variable

In [254]:
pt=PowerTransformer() # Instantiation

pt.fit(df_final[['price']])
df_final['price_PT']= pt.transform(df_final[['price']])

In [255]:
# Checking the distribution of price after tranformation

sns.distplot(df_final.price_PT)

# Final Features and target variable

In [256]:
X = df_final.drop(columns = ['price','price_PT','maximum_nights_avg_ntm','longitude','calculated_host_listings_count',
                           'review_scores_accuracy','review_scores_checkin','minimum_nights_avg_ntm','review_scores_communication',
                          'review_scores_location','review_scores_value','host_response_rate','latitude','review_scores_cleanliness',
                          'availability_60','review_scores_rating','has_availability_t','host_acceptance_rate','availability_90','bedrooms',
                          ])
y=df_final['price_PT']

# spliting the data set to train and test

In [257]:
X_train_full,X_test,y_train_full,y_test = train_test_split(X,y,test_size=0.20, random_state=100)

In [258]:
X_train_full.shape,y_train_full.shape

In [259]:
X_test.shape,y_test.shape

# Spliting the training data into training and validation

In [260]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=100)

# Scaling the data before using in ANN

In [261]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [262]:
import tensorflow as tf
from tensorflow import keras
np.random.seed(42)
tf.random.set_seed(42)

In [263]:
X_train.shape

# Building ANN

# Sequential API method

In [350]:
del model
keras.backend.clear_session()

In [351]:
model = keras.models.Sequential([
    keras.layers.Dense(1000, activation="relu", input_shape=X_train.shape[1:]),
    keras.layers.Dense(1000, activation="relu"),
    keras.layers.Dense(1000,activation='relu'),
    keras.layers.Dense(1000,activation='relu'),
    keras.layers.Dense(1000,activation='relu'),
    keras.layers.Dense(1000,activation='relu'),
    keras.layers.Dense(1000,activation='relu'),
    keras.layers.Dense(1000,activation='relu'),
    keras.layers.Dense(1)
])

In [352]:
model.summary()

In [353]:
model.compile(loss="mean_squared_error", 
              optimizer='adam', 
              metrics=tf.keras.metrics.RootMeanSquaredError(name="root_mean_squared_error", dtype=None))

In [354]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=15,
                                                  restore_best_weights=True)

In [355]:
model_history = model.fit(X_train, y_train, epochs=200, validation_data=(X_valid, y_valid),
                          callbacks=[ early_stopping_cb])

In [356]:
mse_test = model.evaluate(X_test, y_test)

In [271]:
#model_history.history

In [272]:
pd.DataFrame(model_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)

plt.show()

# Functional API

In [342]:
del model
keras.backend.clear_session()

In [343]:
input_ = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(1000, activation="relu")(input_)
hidden2 = keras.layers.Dense(1000, activation="relu")(hidden1)
hidden3 = keras.layers.Dense(1000, activation='relu')(hidden2)
hidden4 = keras.layers.Dense(1000, activation='relu')(hidden3)
hidden5 = keras.layers.Dense(1000, activation='relu')(hidden4)
concat = keras.layers.concatenate([input_,hidden3,hidden5])
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs=[input_], outputs=[output])

In [344]:
model.summary()

In [345]:
import pydot
keras.utils.plot_model(model)

In [346]:
model.compile(loss="mean_squared_error", 
              optimizer='adam', 
              metrics=tf.keras.metrics.RootMeanSquaredError(name="root_mean_squared_error", dtype=None))

In [347]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=15,
                                                  restore_best_weights=True)

In [348]:
model_history = model.fit(X_train, y_train, epochs=200, validation_data=(X_valid, y_valid),
                          callbacks=[ early_stopping_cb])

In [349]:
mse_test = model.evaluate(X_test, y_test)

In [310]:
pd.DataFrame(model_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)

plt.show()

**We can say that in both the type of API we get the same MAE= 0.41 which is better and accepitable**