In [None]:
import pandas as pd

traindata = pd.read_csv('files/csv/traindata.csv')
X = traindata[['lat', 'lng']]
Y = traindata['price_per_m2']

In [None]:
import random

def get_lat(lat):
    return lat + random.uniform(-0.000406,0.000406)

def get_lng(lng):
    return lng + random.uniform(-0.000306,0.000306)

X['lat'] = X['lat'].apply(get_lat)
X['lng'] = X['lng'].apply(get_lng)

# traindata_unique = traindata.groupby(['lat', 'lng']).quantile(q=0.5).reset_index()
# traindata_unique.sort_values(by='price_per_m2')['price_per_m2'].sum(axis=0)
# X = traindata_unique[['lat', 'lng']].to_numpy()
# Y = traindata_unique['price_per_m2'].to_numpy()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.model_selection import cross_val_score, train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.2, random_state=2)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
Y_train_scaled = Y_train/1e4

In [None]:
scaled = traindata_unique[0:]
scaled['lat'] = (traindata_unique['lat'] - traindata_unique['lat'].min()) / (traindata_unique['lat'].max() - traindata_unique['lat'].min())
scaled['lng'] = (traindata_unique['lng'] - traindata_unique['lng'].min()) / (traindata_unique['lng'].max() - traindata_unique['lng'].min())
scaled['price_per_m2'] = traindata_unique['price_per_m2'] / 2e5
scaled.describe()

In [None]:
scaled = scaled.sample(frac=1, random_state=10).reset_index(drop=True)

In [None]:
traindata['price_per_m2'] = (traindata['price_per_m2'] - traindata['price_per_m2'].min()) / (traindata['price_per_m2'].max() - traindata['price_per_m2'].min())
traindata['lat'] = (traindata['lat'] - traindata['lat'].min()) / (traindata['lat'].max() - traindata['lat'].min())
traindata['lng'] = (traindata['lng'] - traindata['lng'].min()) / (traindata['lng'].max() - traindata['lng'].min())

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import linear, relu, sigmoid

import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)
tf.random.set_seed(1234) # for consistent results

In [None]:
model = Sequential(
    [               
        ### START CODE HERE ### 
        tf.keras.Input(shape=(2,)),
        tf.keras.layers.Dense(1, activation="linear"),
        ### END CODE HERE ### 
    ], name = "my_model" 
)

In [None]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=1000,
    decay_rate=0.9
)

optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)

model.compile(
    loss=tf.keras.losses.MeanSquaredError(),
    optimizer=optimizer,
)

history = model.fit(
    X_train_scaled,
    Y_train_scaled.to_numpy().reshape(-1,1),
    epochs=50
)

In [None]:
scaler_val = StandardScaler()
X_val = scaler_val.fit_transform(X_val)

In [None]:
X_train_ori = scaler.inverse_transform(X_train_scaled[8].reshape(1,2))
X_train_ori

In [None]:
Y_train.reset_index(drop=True)[8]

In [None]:
res = model.predict(X_train_scaled[8].reshape(1,2)) * 1e4
res

In [None]:
traindata_unique['price_per_m2'].to_numpy().reshape(-1,1) - res

In [None]:
prediction = model.predict(X_train[20].reshape(1,2))  # prediction
print(f"prediction: {prediction.flatten()}")
print(f"real: {Y_train[20]}")
print(f"prediction error: {prediction.flatten() - Y_train[20]}")