In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import shutil
import math
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import seaborn as sns

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
print(tf.__version__)

In [None]:
df_train_eval = pd.read_csv('/kaggle/input/house-price-prediction-challenge/train.csv')
df_test = pd.read_csv('/kaggle/input/house-price-prediction-challenge/test.csv')

df_train_eval.head()

In [None]:
df_train_eval.describe()

In [None]:
new = df_train_eval['ADDRESS'].str.split(',', n =1, expand = True)
df_train_eval['CITY'] = new[1]
df_train_eval.drop(columns='ADDRESS', axis = 1, inplace=True)
df_train_eval.head()

In [None]:
new1 = df_test['ADDRESS'].str.split(',', n =1, expand = True)
df_test['CITY'] = new[1]
df_test.drop(columns='ADDRESS', axis = 1, inplace=True)
df_test.head()

In [None]:
#splitting training set to train and evaluate
np.random.seed(seed=1)
mask = np.random.rand(len(df_train_eval)) < 0.8
df_train = df_train_eval[mask]
df_eval = df_train_eval[~mask]

df_train.describe()

In [None]:
sns.pairplot(df_train_eval)

In [None]:
sns.heatmap(df_train_eval.corr())

# **Feature Engineering**

In [None]:
#Create Pandas Input Function
def create_input_fn(dataframe, Epochs):
    return tf.compat.v1.estimator.inputs.pandas_input_fn(
        x = dataframe,
        y = dataframe['TARGET(PRICE_IN_LACS)'],
        batch_size = 512,
        num_epochs = Epochs,
        shuffle = True,
        queue_capacity = 1000,
        num_threads = 1
    )

In [None]:
#Defining the feature columns
def feature_cols():
    long_buc = tf.feature_column.bucketized_column(
        tf.feature_column.numeric_column('LONGITUDE'),
        boundaries = np.arange(-38, 60, 4).tolist())
    lat_buc = tf.feature_column.bucketized_column(
        tf.feature_column.numeric_column('LATITUDE'),
        boundaries = np.arange(-122, 153, 10).tolist())
    
    return [
        tf.feature_column.numeric_column('SQUARE_FT'),
        long_buc,
        lat_buc,
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                'POSTED_BY', ['Owner', 'Dealer', 'Builder']
            )
        ),
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                'BHK_OR_RK', ['BHK', 'RK']
            )
        ),
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                'UNDER_CONSTRUCTION', df_train_eval['UNDER_CONSTRUCTION'].unique()
            )
        ),
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                'RERA', df_train_eval['RERA'].unique()
            )
        ),
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                'READY_TO_MOVE', df_train_eval['READY_TO_MOVE'].unique()
            )
        ),
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                'RESALE', df_train_eval['RESALE'].unique()
            )
        ),
        tf.feature_column.numeric_column('BHK_NO.'),
        tf.feature_column.embedding_column(
            tf.feature_column.crossed_column([long_buc, lat_buc],hash_bucket_size=400), dimension = 10
        ), 
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_hash_bucket('CITY', hash_bucket_size=4500)
        )
    ]


# **Train And Evaluate**

In [None]:
def serving_input_receiver_fn():
    feature_placeholders = {
        cols: tf.placeholder(tf.float32, [None, len(df_train_eval.columns)])
    }
    
    features = {
        key: tf.expand_dims(tensor, -1)
        for key, tensor in feature_placeholders.items()
    }
    features[cols] = tf.squeeze(features[cols], axis = [2])
    
    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)

In [None]:
def train_eval(output_dir, num_steps):
    optimizer = tf.compat.v1.train.FtrlOptimizer(learning_rate=0.02)
    estimator = tf.compat.v1.estimator.LinearRegressor(
        model_dir = output_dir, feature_columns=feature_cols(), optimizer = optimizer)
    
    #adding root mean square error evaluation metric
    def eval_rmse(labels, predictions):
        pred_values = tf.cast(predictions['predictions'], tf.float64)
        return {'rmse': tf.compat.v1.metrics.root_mean_squared_error(labels, pred_values)}
    estimator = tf.compat.v1.estimator.add_metrics(estimator, eval_rmse)
    
    train_spec = tf.estimator.TrainSpec(input_fn=create_input_fn(df_train, None), max_steps = num_steps)
    
    exporter = tf.estimator.LatestExporter('exporter', serving_input_receiver_fn)
    
    eval_spec = tf.estimator.EvalSpec(input_fn=create_input_fn(df_eval, 1), 
                                      steps=None,
                                      start_delay_secs = 1,
                                      throttle_secs = 5)
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

In [None]:
outputdir = '/kaggle/working'

shutil.rmtree(outputdir, ignore_errors=True) #this is set the output directory t start afresh everytime
tf.compat.v1.summary.FileWriterCache.clear()
train_eval(outputdir, 2000)