In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install tensorflow==1.15.0

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
%matplotlib inline

import tensorflow as tf
from tqdm import tqdm
print(tf.__version__)

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

In [None]:
path = "/kaggle/input/walmart-recruiting-store-sales-forecasting/"
dataset = pd.read_csv(path + "train.csv.zip", names=['Store','Dept','Date','weeklySales','isHoliday'],sep=',', header=0)
features = pd.read_csv(path + "features.csv.zip",sep=',', header=0,
                       names=['Store','Date','Temperature','Fuel_Price','MarkDown1','MarkDown2','MarkDown3','MarkDown4',
                              'MarkDown5','CPI','Unemployment','IsHoliday']).drop(columns=['IsHoliday'])
stores = pd.read_csv(path + "stores.csv", names=['Store','Type','Size'],sep=',', header=0)
dataset = dataset.merge(stores, how='left').merge(features, how='left')

dataset.head()

In [None]:
sales = dataset.groupby(['Dept', 'Date', 'Store'])['weeklySales'].sum().unstack()
sales

## Sales_complete is the complete subset of sales data without any missing value







In [None]:
'''
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data = scaler.fit_transform(sales).astype(np.float32)
sales_scaled = pd.DataFrame(data=data, columns=sales.columns, index=sales.index)

sales_complete = sales_scaled[sales_scaled.isna().sum(axis=1) == 0]
print(sales_complete.shape)
sales_complete
'''
sales_complete = sales[sales.isna().sum(axis=1) == 0]
print(sales_complete.shape)
sales_complete

In [None]:
sales_new=sales_complete.iloc[:4416,:42].copy()
sales_new.shape

In [None]:
data_new = sales_new.to_numpy()

In [None]:
# Split Data into 6 pieces
full_size = len(data_new)
n_subsets = 6
sub_size = full_size // n_subsets

subsets = []
for k in range(6):
    subsets.append(data_new[k*sub_size:(k+1)*sub_size, k*3:(k*3)+12])
    
print("Split data into", n_subsets, "subsets. Size of one subset:", sub_size)

In [None]:
# Apply GAIN on the last dataset
Data = subsets[5]
# Data = raw_data

In [None]:
#%% System Parameters
# 1. Mini batch size
mb_size = 128
# 2. Missing rate
p_miss = 0.2
# 3. Hint rate
p_hint = 0.9
# 4. Loss Hyperparameters
alpha = 10
# 5. Train Rate
train_rate = 0.8

# Parameters
No = len(Data)
Dim = len(Data[0,:])

# Hidden state dimensions
H_Dim1 = Dim
H_Dim2 = Dim
k=0.99 #pruning percentage
print(Dim)

In [None]:
# regularization
#regularizer = tf.contrib.layers.l2_regularizer(scale=0.001)
#regularizer = tf.contrib.layers.l1_regularizer(scale=0.5)
#regularizer=tf.contrib.layers.l1_l2_regularizer(scale_l1=0.001,scale_l2=0.001,scope=None)

In [None]:
# Normalization (0 to 1)
def normalization(Data,Dim=12):
    Min_Val = np.zeros(Dim)
    Max_Val = np.zeros(Dim)

    for i in range(Dim):
        Min_Val[i] = np.min(Data[:,i])
        Data[:,i] = Data[:,i] - np.min(Data[:,i])
        Max_Val[i] = np.max(Data[:,i])
        Data[:,i] = Data[:,i] / (np.max(Data[:,i]) + 1e-6)    

    #%% Missing introducing
    p_miss_vec = p_miss * np.ones((Dim,1)) 
   
    Missing = np.zeros((No,Dim))

    for i in range(Dim):
        A = np.random.uniform(0., 1., size = [len(Data),])
        B = A > p_miss_vec[i]
        Missing[:,i] = 1.*B
    return Data, Missing

In [None]:
#%% Train Test Division    
def train_test(Data,Missing):
    idx = np.random.permutation(No)

    Train_No = int(No * train_rate)
    Test_No = No - Train_No
    
    # Train / Test Features
    trainX = Data[idx[:Train_No],:]
    testX = Data[idx[Train_No:],:]

    # Train / Test Missing Indicators
    trainM = Missing[idx[:Train_No],:]
    testM = Missing[idx[Train_No:],:]
    
    return trainX,testX,trainM,testM,Train_No,Test_No

In [None]:
#%% Necessary Functions

# 1. Xavier Initialization Definition
def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape = size, stddev = xavier_stddev)
    
# Hint Vector Generation
def sample_M(m, n, p):
    A = np.random.uniform(0., 1., size = [m, n])
    B = A > p
    C = 1.*B
    return C

In [None]:
#%% GAIN Architecture   
   
#%% 1. Input Placeholders
# 1.1. Data Vector
def input_placeholder():
    
    X = tf.placeholder(tf.float32, shape = [None, Dim])
    # 1.2. Mask Vector 
    M = tf.placeholder(tf.float32, shape = [None, Dim])
    # 1.3. Hint vector
    H = tf.placeholder(tf.float32, shape = [None, Dim])
    # 1.4. X with missing values
    New_X = tf.placeholder(tf.float32, shape = [None, Dim])
    
    return X,M,H,New_X


In [None]:
#%% 2. Discriminator
D_W1 = tf.Variable(xavier_init([Dim*2, H_Dim1]), name="D_W1")     # Data + Hint as inputs
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, D_W1)
D_b1 = tf.Variable(tf.zeros(shape = [H_Dim1]), name="D_b1")

#D_W2 = tf.Variable(xavier_init([H_Dim1, H_Dim2]), name="D_W2")
D_W2 = tf.Variable(xavier_init([12, 37]), name="D_W2")
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, D_W2)
#D_b2 = tf.Variable(tf.zeros(shape = [H_Dim2]), name="D_b2")
D_b2 = tf.Variable(tf.zeros(shape = [37]), name="D_b2")

#D_W3 = tf.Variable(xavier_init([H_Dim2, Dim]), name="D_W3")
D_W3 = tf.Variable(xavier_init([37, Dim]), name="D_W3")
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, D_W3)
D_b3 = tf.Variable(tf.zeros(shape = [Dim]), name="D_b3")       # Output is multi-variate

theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3]

In [None]:
#%% 3. Generator
G_W1 = tf.Variable(xavier_init([Dim*2, H_Dim1]), name="G_W1")     # Data + Mask as inputs (Random Noises are in Missing Components)
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, G_W1)
G_b1 = tf.Variable(tf.zeros(shape = [H_Dim1]), name="G_b1")

#G_W2 = tf.Variable(xavier_init([H_Dim1, H_Dim2]), name="G_W2")
G_W2 = tf.Variable(xavier_init([12, 37]), name="G_W2")
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, G_W2)
#G_b2 = tf.Variable(tf.zeros(shape = [H_Dim2]), name="G_b2")
G_b2 = tf.Variable(tf.zeros(shape = [37]), name="G_b2")

#G_W3 = tf.Variable(xavier_init([H_Dim2, Dim]), name="G_W3")
G_W3 = tf.Variable(xavier_init([37, Dim]), name="G_W3")
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, G_W3)
G_b3 = tf.Variable(tf.zeros(shape = [Dim]), name="G_b3")

theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3]

In [None]:
#%% GAIN Function

#%% 1. Generator
def generator(new_x,m):
    inputs = tf.concat(axis = 1, values = [new_x,m])  # Mask + Data Concatenate
    G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
    G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2)   
    G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) # [0,1] normalized Output
    
    return G_prob
    
#%% 2. Discriminator
def discriminator(new_x, h):
    inputs = tf.concat(axis = 1, values = [new_x,h])  # Hint + Data Concatenate
    D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)  
    D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2)
    D_logit = tf.matmul(D_h2, D_W3) + D_b3
    D_prob = tf.nn.sigmoid(D_logit)  # [0,1] Probability Output
    
    return D_prob

In [None]:
#%% 3. Other functions
# Random sample generator for Z
def sample_Z(m, n):
    return np.random.uniform(0., 0.01, size = [m, n])        

# Mini-batch generation
def sample_idx(m, n):
    A = np.random.permutation(m)
    idx = A[:n]
    return idx

In [None]:
#%% Structure
X,M,H,New_X=input_placeholder()
#%% Structure
# Generator
G_sample = generator(New_X,M)

# Combine with original data
Hat_New_X = New_X * M + G_sample * (1-M)

# Discriminator
D_prob = discriminator(Hat_New_X, H)

#%% Loss
D_loss1 = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) + (1-M) * tf.log(1. - D_prob + 1e-8)) 
G_loss1 = -tf.reduce_mean((1-M) * tf.log(D_prob + 1e-8))
MSE_train_loss = tf.reduce_mean((M * New_X - M * G_sample)**2) / tf.reduce_mean(M)

D_loss = D_loss1
G_loss = G_loss1 + alpha * MSE_train_loss 

#%% MSE Performance metric
MSE_test_loss = tf.reduce_mean(((1-M) * X - (1-M)*G_sample)**2) / tf.reduce_mean(1-M)

#%% Solver
D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)

In [None]:
from tqdm import tqdm
import matplotlib
import pickle
import matplotlib.pyplot as plt
from tensorflow.contrib.model_pruning.python import pruning
from tensorflow.contrib.model_pruning.python.layers import layers
import time
%matplotlib inline

In [None]:
Data,Missing=normalization(Data)
trainX,testX,trainM,testM,Train_No,Test_No=train_test(Data,Missing)

In [None]:
# Sessions

sess = tf.Session()
sess.run(tf.global_variables_initializer())

#%% Iterations
train_losses = []
test_losses = []

#%% Start Iterations
t=time.time()
for it in tqdm(range(1000)):    
    
    #%% Inputs
    mb_idx = sample_idx(Train_No, mb_size)
    X_mb = trainX[mb_idx,:]  
    
    Z_mb = sample_Z(mb_size, Dim) 
#     M_mb = trainM[mb_idx,:]  
    M_mb = trainM[:mb_size, :]
    H_mb1 = sample_M(mb_size, Dim, 1-p_hint)
    H_mb = M_mb * H_mb1
    
    New_X_mb = M_mb * X_mb + (1-M_mb) * Z_mb  # Missing Data Introduce
    
    _, D_loss_curr = sess.run([D_solver, D_loss1], feed_dict = {M: M_mb, New_X: New_X_mb, H: H_mb})
    _, G_loss_curr, MSE_train_loss_curr, MSE_test_loss_curr = sess.run([G_solver, G_loss1, MSE_train_loss, MSE_test_loss],
                                                                       feed_dict = {X: X_mb, M: M_mb, New_X: New_X_mb, H: H_mb})
            
        
    #%% Intermediate Losses
#     if it % 100 == 0:
#         print('Iter: {}'.format(it))
#         print('Train_loss: {:.4}'.format(np.sqrt(MSE_train_loss_curr)))
#         print('Test_loss: {:.4}'.format(np.sqrt(MSE_test_loss_curr)))
#         print()
    train_losses.append(np.sqrt(MSE_train_loss_curr))
    test_losses.append(np.sqrt(MSE_test_loss_curr))
t1=time.time()    
print("Time cost before pruning: ",t1-t)
#%% Final Loss
   
Z_mb = sample_Z(Test_No, Dim) 
M_mb = testM
X_mb = testX
        
New_X_mb = M_mb * X_mb + (1-M_mb) * Z_mb  # Missing Data Introduce
    
MSE_final, Sample = sess.run([MSE_test_loss, G_sample], feed_dict = {X: testX, M: testM, New_X: New_X_mb})
        
print('Final Test RMSE: ' + str(np.sqrt(MSE_final)))
#print("Sparsity of layers", sess.run(tf.contrib.model_pruning.get_weight_sparsity()))



In [None]:
# Plot training curve
plt.plot(train_losses, 'b.', label="Training Loss", alpha=0.5)
plt.plot(test_losses, 'g.', label="Test Loss", alpha=0.5)
plt.title("Training baseline GAIN from Scratch")
plt.xlabel("Number of Mini-Batches")
plt.ylabel("RMSE")
#plt.ylim(0.075,0.3)
plt.legend()
plt.savefig("GAIN_Baseline.png")
plt.show()