In [None]:
import numpy as np
import pandas as pd
import pandas_profiling
import os
from tensorflow import keras

In [None]:
class pre_processing_data():
    def __init__(self, train_data_path, valid_data_path, test_data_path):
        self.train_data_path = train_data_path
        self.valid_data_path = valid_data_path
        self.test_data_path = test_data_path
                 
    def load_data(self):
        self.train_data_org = pd.read_csv(self.train_data_path)
        self.valid_data_org = pd.read_csv(self.valid_data_path)
        self.test_data_org = pd.read_csv(self.test_data_path)         
        
    def drop_id_price(self):
        train_data_temp = self.train_data_org
        train_data_temp.drop(['id','price'],axis=1,inplace=True)
        valid_data_temp = self.valid_data_org
        valid_data_temp.drop(['id','price'],axis=1,inplace=True)
        test_data_temp = self.test_data_org
        test_data_temp.drop('id',axis=1,inplace=True)
        return train_data_temp.to_numpy(),valid_data_temp.to_numpy(),test_data_temp.to_numpy()
    
    def mean_std(self):
        mean = self.train_data.mean(axis=0)
        self.train_data -= mean
        std = self.train_data.std(axis=0)
        self.train_data /= std
        
        self.valid_data-=mean
        self.valid_data/=std
        
        self.test_data-=mean
        self.test_data/=std
        
    def get_price_target(self):
        train_price_temp = self.train_data_org.price.to_numpy()
        valid_price_temp = self.valid_data_org.price.to_numpy()
        return train_price_temp,valid_price_temp
        
    def do_pre_proccessing_data(self): 
        self.load_data()
        self.train_targets, self.valid_targets = self.get_price_target()
        self.train_data, self.valid_data, self.test_data = self.drop_id_price()
        self.mean_std()
        
    def show_shape(self): 
        l_train = self.train_data.shape
        l_valid = self.valid_data.shape
        l_test = self.test_data.shape
        print(f"l_train:{l_train}, l_valid:{l_valid}, l_test:{l_test}")

    def get_data_shape1(self, input_data): 
        return input_data.shape[1]

In [None]:
all_data = pre_processing_data("train-v3.csv", "valid-v3.csv", "test-v3.csv")
all_data.do_pre_proccessing_data()


In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers
#import tensorflow as tf
#tf.config.experimental.list_physical_devices('GPU')
# 設定 Keras 使用的 Session

def build_model(input_shape1):
    model = models.Sequential()
    model.add(layers.Dense(128, activation='relu',input_shape=(input_shape1,)))
    model.add(layers.Dense(128, activation='relu',input_shape=(input_shape1,)))
    model.add(layers.Dense(128, activation='relu',input_shape=(input_shape1,)))
    model.add(layers.Dense(1, kernel_initializer='normal'))
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

In [None]:
import numpy as np
k = 4
num_val_samples = len(all_data.train_data) // k
num_epochs = 5
all_mae_histories = []
for i in range(k):
    print('processing fold #', i)
    val_data = all_data.train_data[i * num_val_samples: (i+1) * num_val_samples]
    val_targets = all_data.train_targets[i * num_val_samples: (i+1) * num_val_samples]
    print(val_data)
    print(val_data.shape)
    
    trda1 = all_data.train_data[:i * num_val_samples]
    trda2 = all_data.train_data[(i + 1) * num_val_samples:]    
    partial_train_data = np.concatenate([trda1,trda2],axis=0)
    print(partial_train_data) 

    trta1 = all_data.train_targets[:i * num_val_samples]
    trta2 = all_data.train_targets[(i + 1) * num_val_samples:]
    partial_train_targets = np.concatenate([trta1,trta2],axis=0)
    print(partial_train_targets) 
    
    length = all_data.get_data_shape1(all_data.train_data)
    print('processing fold build mode#', i)
    model = build_model(length)
    history = model.fit(partial_train_data, partial_train_targets,
                        validation_data=(val_data, val_targets),
                        epochs=num_epochs, batch_size=1024, verbose=1)
    print('processing fold build mode ok#', i)
    
    
    #mae_history = history.history['val_mean_absolute_error']
    mae_history = history.history['val_mae']
    all_mae_histories.append(mae_history)
    

In [None]:
mae_history = history.history['val_mae']
all_mae_histories.append(mae_history)
average_mae_history = [
    np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]

In [None]:
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()

In [None]:
ommit_observed_mae = 5
def smooth_curve(points, factor=0.9):
  smoothed_points = []
  for point in points:
    if smoothed_points:
      previous = smoothed_points[-1]
      smoothed_points.append(previous * factor + point * (1 - factor))
    else:
      smoothed_points.append(point)
  return smoothed_points

smooth_mae_history = smooth_curve(average_mae_history[ommit_observed_mae:])

plt.plot(range(1, len(smooth_mae_history) + 1), smooth_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()

In [None]:
from tensorflow.keras import backend as K
# Some memory clean-up
K.clear_session()

In [None]:
get_observed_mae = 5
best_epochs_num = 200
# ommit_observed_mae + get_observed_mae
print(best_epochs_num)
length = all_data.get_data_shape1(all_data.train_data)
model = build_model(length)
model.fit(all_data.train_data, all_data.train_targets,
                        epochs=best_epochs_num, batch_size=1024, verbose=0)
valid_mse_socre, valid_mae_score = model.evaluate(all_data.valid_data, all_data.valid_targets, verbose=0)

In [None]:
print(valid_mae_score)

In [None]:
yy = model.predict(all_data.test_data)
yy

In [None]:
np.savetxt('test.csv', yy, delimiter = ',')