In [46]:
%%bash
export PROJECT=$(gcloud config list project --format "value(core.project)")
echo "Your current GCP Project Name is: "${PROJECT}


Your current GCP Project Name is: wavesproject


In [47]:
import os
PROJECT = "wavesproject"  # Replace with your project name
BUCKET_NAME="wavesbucket-2"

REGION="us-central1"

'''
gs://wavesbucket-2
https://console.cloud.google.com/storage/browser/wavesbucket-2

https://storage.googleapis.com/wavesbucket-2/wavesAI.csv

https://storage.googleapis.com/wavesbucket-2/waves-validation-AI.csv
'''

os.environ["PROJECT"] = PROJECT
os.environ["BUCKET_NAME"] = BUCKET_NAME
os.environ["REGION"] = REGION
os.environ["TFVERSION"] = "2.3"
os.environ["PYTHONVERSION"] = "3.7"
#Ensure to remove temporary files
!rm -f /tmp/waves_data/waves-validation-AI.csv
!rm -f /tmp/waves_data/wavesAI.csv
!head -5 /tmp/waves_data/waves-validation-AI.csv

head: cannot open '/tmp/waves_data/waves-validation-AI.csv' for reading: No such file or directory


In [48]:
%%bash

if ! gsutil ls | grep -q gs://${BUCKET_NAME}; then
    gsutil mb -l ${REGION} gs://${BUCKET_NAME}
fi

In [49]:
%%bash

gsutil cp  gs://$BUCKET_NAME/wavesAI.csv .
gsutil cp  gs://$BUCKET_NAME/waves-validation-AI.csv .

Copying gs://wavesbucket-2/wavesAI.csv...
/ [1 files][  3.0 KiB/  3.0 KiB]                                                
Operation completed over 1 objects/3.0 KiB.                                      
Copying gs://wavesbucket-2/waves-validation-AI.csv...
/ [1 files][  2.0 KiB/  2.0 KiB]                                                
Operation completed over 1 objects/2.0 KiB.                                      


In [50]:
%%bash
mkdir -p trainer
touch trainer/__init__.py

In [51]:
%%writefile trainer/util.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
from six.moves import urllib
import tempfile

import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler

# Storage directory
DATA_DIR = os.path.join(tempfile.gettempdir(), 'waves_data')

# Download options.
DATA_URL = (
    'https://storage.googleapis.com/wavesbucket-2'
    )
TRAINING_FILE = 'wavesAI.csv'
EVAL_FILE = 'waves-validation-AI.csv'
TRAINING_URL = '%s/%s' % (DATA_URL, TRAINING_FILE)
EVAL_URL = '%s/%s' % (DATA_URL, EVAL_FILE)

# These are the features in the dataset.

_CSV_COLUMNS = [
    'ECN', 'ATT1', 'ATT2', 'ATT3', 'ATT4','ATT5',
    'ATT6', 'ATT7', 'ATT8', 'ATT9', 'ATT10','ATT11',
    'ATT12', 'ATT13', 'ATT14', 'ATT15','ATT16','ATT17',
    'ATT18','ATT19','ATT20','gender','workclass','marital_status',
    'occupation','relationship','race','native_country','income_bracket',
    'ATT_CODE'
]

# This is the label (target) we want to predict.
_LABEL_COLUMN = 'ATT_CODE'

# These are columns we will not use as features for training. There are many
# reasons not to use certain attributes of data for training. Perhaps their
# values are noisy or inconsistent, or perhaps they encode bias that we do not
# want our model to learn. For a deep dive into the features of this Census
# dataset and the challenges they pose, see the Introduction to ML Fairness
# Notebook: https://colab.research.google.com/github/google/eng-edu/blob
# /master/ml/cc/exercises/intro_to_fairness.ipynb

UNUSED_COLUMNS = ['gender']

_CATEGORICAL_TYPES = {
    'workclass': pd.api.types.CategoricalDtype(categories=[
        'Federal-gov', 'Local-gov', 'Never-worked', 'Private', 'Self-emp-inc',
        'Self-emp-not-inc', 'State-gov', 'Without-pay'
    ]),
    'marital_status': pd.api.types.CategoricalDtype(categories=[
        'Divorced', 'Married-AF-spouse', 'Married-CIV-spouse',
        'Married-spouse-absent', 'Never-married','Married', 'UnMarried,','Separated', 'Widowed'
    ]),
    'occupation': pd.api.types.CategoricalDtype([
        'Adm-clerical', 'Armed-Forces', 'Craft-repair', 'Exec-managerial',
        'Farming-fishing', 'Handlers-cleaners', 'Machine-op-inspct',
        'Other-service', 'Priv-house-serv', 'Prof-specialty', 'Protective-serv',
        'Sales', 'Prof,','Tech-support', 'Transport-moving'
    ]),
    'relationship': pd.api.types.CategoricalDtype(categories=[
        'Husband', 'Not-in-family', 'Other-relative', 'Own-child', 'Unmarried',
        'Wife'
    ]),
    'race': pd.api.types.CategoricalDtype(categories=[
        'Amer-Indian-Eskimo', 'Asian-Pac-Islander', 'Asian','Black', 'Other', 'White'
    ]),
    'native_country': pd.api.types.CategoricalDtype(categories=[
        'Cambodia', 'Canada', 'China', 'Columbia', 'Cuba', 'Dominican-Republic',
        'Ecuador', 'El-Salvador', 'England', 'France', 'Germany', 'Greece',
        'Guatemala', 'Haiti', 'Holand-Netherlands', 'Honduras', 'Hong',
        'Hungary',
        'India', 'Iran', 'Ireland', 'Italy', 'Jamaica', 'Japan', 'Laos',
        'Mexico',
        'Nicaragua', 'Outlying-US(Guam-USVI-etc)', 'Peru', 'Philippines',
        'Poland',
        'Portugal', 'Puerto-Rico', 'Scotland', 'South', 'Taiwan', 'Thailand',
        'Trinadad&Tobago', 'United-States', 'Vietnam', 'Yugoslavia'
    ]),
    'income_bracket': pd.api.types.CategoricalDtype(categories=[
        '<=50K', '>50K'
    ])
}


def _download_and_clean_file(filename, url):
    """Downloads data from url, and makes changes to match the CSV format.

    The CSVs may use spaces after the comma delimters (non-standard) or include
    rows which do not represent well-formed examples. This fUSEDction strips out
    some of these problems.

    Args:
      filename: filename to save url to
      url: URL of resource to download
    """
    temp_file, _ = urllib.request.urlretrieve(url)
    with tf.io.gfile.GFile(temp_file, 'r') as temp_file_object:
        with tf.io.gfile.GFile(filename, 'w') as file_object:
            for line in temp_file_object:
                line = line.strip()
                line = line.replace(', ', ',')
                if not line or ',' not in line:
                    continue
                if line[-1] == '.':
                    line = line[:-1]
                line += '\n'
                file_object.write(line)
    tf.io.gfile.remove(temp_file)


def download(data_dir):
    """Downloads census data if it is not already present.

    Args:
      data_dir: directory where we will access/save the census data
    """
    print ('In download data_dir is ',data_dir)
    
    tf.io.gfile.makedirs(data_dir)

    training_file_path = os.path.join(data_dir,TRAINING_FILE )
    
    print ("Returning training file path in download(datadir) method  ",training_file_path)   
    
    
    if not tf.io.gfile.exists(training_file_path):
        _download_and_clean_file(training_file_path, TRAINING_URL)

    eval_file_path = os.path.join(data_dir, EVAL_FILE)
    if not tf.io.gfile.exists(eval_file_path):
        _download_and_clean_file(eval_file_path, EVAL_URL)

    print ("Returning training file path in download(datadir) method  ",training_file_path)     
    return training_file_path, eval_file_path


def preprocess(dataframe):
    """Converts categorical features to numeric. Removes unused columns.

    Args:
      dataframe: Pandas dataframe with raw data

    Returns:
      Dataframe with preprocessed data
    """
    
    #Remove gender to take care of bias
    dataframe = dataframe.drop(columns=UNUSED_COLUMNS)
    
    '''
    [
    'ECN', 'ATT1', 'ATT2', 'ATT3', 'ATT4','ATT5',
    'ATT6', 'ATT7', 'ATT8', 'ATT9', 'ATT10','ATT11',
    'ATT12', 'ATT13', 'ATT14', 'ATT15','ATT16','ATT17',
    'ATT18','ATT19','ATT20','workclass','marital_status',
    'occupation','relationship','race','native_country','income_bracket',
    'ATT_CODE'
    ]
    '''

    # Convert integer valued (numeric) columns to floating point
    numeric_columns = dataframe.select_dtypes(['int64']).columns
    
    numeric_columns=['ECN', 'ATT1', 'ATT2', 'ATT3', 'ATT4','ATT5',
    'ATT6', 'ATT7', 'ATT8', 'ATT9', 'ATT10','ATT11',
    'ATT12', 'ATT13', 'ATT14', 'ATT15','ATT16','ATT17',
    'ATT18','ATT19','ATT20','ATT_CODE']
    
    print(numeric_columns)
    dataframe[numeric_columns] = dataframe[numeric_columns].astype('float32')

    # Convert categorical columns to numeric
    cat_columns = dataframe.select_dtypes(['object']).columns
    
    cat_columns=['workclass','marital_status',
    'occupation','relationship','race','native_country','income_bracket']
    
    
    dataframe[cat_columns] = dataframe[cat_columns].apply(lambda x: x.astype(
        _CATEGORICAL_TYPES[x.name]))
    dataframe[cat_columns] = dataframe[cat_columns].apply(lambda x: x.cat.codes)
    
    return dataframe


def standardize(dataframe):
    """Scales numerical columns using their means and standard deviation to get
    z-scores: the mean of each numerical column becomes 0, and the standard
    deviation becomes 1. This can help the model converge during training.

    Args:
      dataframe: Pandas dataframe

    Returns:
      Input dataframe with the numerical columns scaled to z-scores
    """
   
    dtypes = list(zip(dataframe.dtypes.index, map(str, dataframe.dtypes)))
   
    # Normalize numeric columns.
    for column, dtype in dtypes:
        if dtype == 'float32':
            dataframe[column] -= dataframe[column].mean()
            dataframe[column] /= dataframe[column].std()
            
                    
    """
    scaler = MinMaxScaler()
    
    for column, dtype in dtypes:
        if dtype == 'float32':
              dataframe[column] = np.asarray(dataframe[column]).astype('float32').reshape((-1, 1))
   
              dataframe[column]=scaler.fit_transform(dataframe[column].values)
           
    """
    dataframe=dataframe.replace(np.nan, 0)
    return dataframe


def load_data():
    """Loads data into preprocessed (train_x, train_y, eval_y, eval_y)
    dataframes.

    Returns:
      A tuple (train_x, train_y, eval_x, eval_y), where train_x and eval_x are
      Pandas dataframes with features for training and train_y and eval_y are
      numpy arrays with the corresponding labels.
    """
    # Download Census dataset: Training and eval csv files.
    training_file_path, eval_file_path = download(DATA_DIR)
    
    #training_file_path='waves.csv'
    
    #eval_file_path='waves-validation.csv'

    # This census data uses the value '?' for missing entries. We use
    # na_values to
    # find ? and set it to NaN.
    # https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv
    # .html
    
    print(training_file_path)
    print(eval_file_path)
    train_df = pd.read_csv(training_file_path, names=_CSV_COLUMNS,header=0,
                           na_values='?')
    eval_df = pd.read_csv(eval_file_path, names=_CSV_COLUMNS, header=0, na_values='?')
    
    
    
    print (train_df.iloc[0:1,:])
    
    train_df = preprocess(train_df)
    eval_df = preprocess(eval_df)

    # Split train and eval data with labels. The pop method copies and removes
    # the label column from the dataframe.
    train_x, train_y = train_df, train_df.pop(_LABEL_COLUMN)
    eval_x, eval_y = eval_df, eval_df.pop(_LABEL_COLUMN)

    # Join train_x and eval_x to normalize on overall means and standard
    # deviations. Then separate them again.
    all_x = pd.concat([train_x, eval_x], keys=['train', 'eval'])
    all_x = standardize(all_x)
    train_x, eval_x = all_x.xs('train'), all_x.xs('eval')

    # Reshape label columns for use with tf.data.Dataset
   
    #train_y=standardize(train_y)
    #eval_y=standardize(eval_y)
    '''
    all_y = pd.concat([train_y, eval_y], keys=['trainy', 'evaly'])
    print(all_y)
    print(eval_x)
    all_y = standardize(all_y)
    train_y, eval_y = all_y.xs('trainy'), all_x.xs('evaly')
    '''
    
    train_y = np.asarray(train_y).astype('float32').reshape((-1, 1))
    eval_y = np.asarray(eval_y).astype('float32').reshape((-1, 1))
    #Need to standardise this as well  
    scaler = MinMaxScaler()
    train_y=scaler.fit_transform(train_y)
    eval_y=scaler.fit_transform(eval_y)
    
    
    

    return train_x, train_y, eval_x, eval_y

Overwriting trainer/util.py


In [52]:

! cat /tmp/waves_data/wavesAI.csv

cat: /tmp/waves_data/wavesAI.csv: No such file or directory


In [53]:
%%writefile trainer/model.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf


def input_fn(features, labels, shuffle, num_epochs, batch_size):
    """Generates an input function to be used for model training.

    Args:
      features: numpy array of features used for training or inference
      labels: numpy array of labels for each example
      shuffle: boolean for whether to shuffle the data or not (set True for
        training, False for evaluation)
      num_epochs: number of epochs to provide the data for
      batch_size: batch size for training

    Returns:
      A tf.data.Dataset that can provide data to the Keras model for training or
        evaluation
    """
    
    #Reshape for LSTM 3D tensor
    features = features.reshape(features.shape[0], features.shape[1], 1)
    
    
    
    if labels is None:
        inputs = features
    else:
        inputs = (features, labels)
        
        
    
    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(features))

    # We call repeat after shuffling, rather than before, to prevent separate
    # epochs from blending together.
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size)
    return dataset


def create_keras_model(input_dim, learning_rate):
    """Creates Keras Model for Binary Classification.

    The single output node + Sigmoid activation makes this a Logistic
    Regression.

    Args:
      input_dim: How many features the input has
      learning_rate: Learning rate for training

    Returns:
      The compiled Keras model (still needs to be trained)
    
    """
    """
    session = tf.keras.backend.get_session()

    init = tf.global_variables_initializer()
    session.run(init)
    """
    tf.config.experimental_run_functions_eagerly(True)
    
    
    Dense = tf.keras.layers.Dense
    model = tf.keras.Sequential(
        [
             tf.keras.layers.LSTM(units=30, input_shape=(input_dim,1)),
             Dense(1, activation=tf.nn.sigmoid)
        ])

    # Custom Optimizer:
    # https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer
    optimizer = tf.keras.optimizers.RMSprop(lr=learning_rate)

    # Compile Keras model
    model.compile(
        loss='mae', optimizer=optimizer, metrics=['accuracy'])
    return model

Overwriting trainer/model.py


In [54]:
%%writefile trainer/task.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os

from . import model
from . import util

import tensorflow as tf


def get_args():
    """Argument parser.

    Returns:
      Dictionary of arguments.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--job-dir',
        type=str,
        required=True,
        help='local or GCS location for writing checkpoints and exporting '
             'models')
    parser.add_argument(
        '--num-epochs',
        type=int,
        default=30,
        help='number of times to go through the data, default=20')
    parser.add_argument(
        '--batch-size',
        default=4,
        type=int,
        help='number of records to read during each training step, default=128')
    parser.add_argument(
        '--learning-rate',
        default=.01,
        type=float,
        help='learning rate for gradient descent, default=.01')
    parser.add_argument(
        '--verbosity',
        choices=['DEBUG', 'ERROR', 'FATAL', 'INFO', 'WARN'],
        default='INFO')
    args, _ = parser.parse_known_args()
    return args


def train_and_evaluate(args):
    """Trains and evaluates the Keras model.

    Uses the Keras model defined in model.py and trains on data loaded and
    preprocessed in util.py. Saves the trained model in TensorFlow SavedModel
    format to the path defined in part by the --job-dir argument.

    Args:
      args: dictionary of arguments - see get_args() for details
    """

    train_x, train_y, eval_x, eval_y = util.load_data()
    
    # dimensions
    num_train_examples, input_dim = train_x.shape
    num_eval_examples = eval_x.shape[0]

    # Create the Keras Model
    keras_model = model.create_keras_model(
        input_dim=input_dim, learning_rate=args.learning_rate)

  

  
    
    # Pass a numpy array by passing DataFrame.values
    training_dataset = model.input_fn(
        features=train_x.values,
        labels=train_y,
        shuffle=True,
        num_epochs=args.num_epochs,
        batch_size=args.batch_size)

    # Pass a numpy array by passing DataFrame.values
    validation_dataset = model.input_fn(
        features=eval_x.values,
        labels=eval_y,
        shuffle=False,
        num_epochs=args.num_epochs,
        batch_size=num_eval_examples)

    # Setup Learning Rate decay.
    lr_decay_cb = tf.keras.callbacks.LearningRateScheduler(
        lambda epoch: args.learning_rate + 0.02 * (0.5 ** (1 + epoch)),
        verbose=True)

    # Setup TensorBoard callback.
    tensorboard_cb = tf.keras.callbacks.TensorBoard(
        os.path.join(args.job_dir, 'keras_tensorboard'),
        histogram_freq=0)
    
    
    #init = tf.global_variables_initializer()
    #tf.config.experimental_run_functions_eagerly(True)
    
    
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    first_graph = tf.Graph()
    first_session = tf.compat.v1.Session(config=config)

    with first_session.as_default():
      # Train model
        keras_model.fit(
            training_dataset,
            steps_per_epoch=int(num_train_examples / args.batch_size),
            epochs=args.num_epochs,
            validation_data=validation_dataset,
            validation_steps=1,
            verbose=1,
            callbacks=[lr_decay_cb, tensorboard_cb])

    keras_model.run_eagerly =True
    
    export_path = os.path.join(args.job_dir, 'keras_export')
    tf.keras.models.save_model(keras_model, export_path)
    print('Model exported to: {}'.format(export_path))



if __name__ == '__main__':
    strategy = tf.distribute.MirroredStrategy()
    with strategy.scope():
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True
        first_graph = tf.Graph()
        first_session = tf.compat.v1.Session(config=config)
        with first_session.as_default(): 
            args = get_args()
            tf.compat.v1.logging.set_verbosity(args.verbosity)
            train_and_evaluate(args)

Overwriting trainer/task.py


In [55]:
#Ensure to remove temporary files
!rm -f /tmp/waves_data/waves-validation-AI.csv
!rm -f /tmp/waves_data/wavesAI.csv
!head -5 /tmp/waves_data/waves-validation-AI.csv

head: cannot open '/tmp/waves_data/waves-validation-AI.csv' for reading: No such file or directory


In [56]:
import pandas as pd
_CSV_COLUMNS = [
        'ECN', 'ATT1', 'ATT2', 'ATT3', 'ATT4', 'ATT5',
        'ATT6', 'ATT7', 'ATT8', 'ATT9', 'ATT10', 'ATT11',
        'ATT12', 'ATT13', 'ATT14', 'ATT15', 'ATT16', 'ATT17', 
         'ATT18', 'ATT19', 'ATT20','gender','workclass','marital_status',
         'occupation','relationship','race','native_country','income_bracket',
         'ATT_CODE'
    ]
train_df = pd.read_csv("wavesAI.csv", names=_CSV_COLUMNS,header=0,skiprows=1, na_values='?')
    

print (train_df.iloc[0:1,:])



   ECN  ATT1  ATT2  ATT3  ATT4  ATT5  ATT6  ATT7  ATT8  ATT9  ...  ATT20  \
0    1    90    40    20    67    43    56    78    27    78  ...     55   

   gender    workclass        marital_status         occupation  relationship  \
0  'Male'  'State-gov'  'Married-civ-spouse'  'Priv-house-serv'     'Husband'   

      race  native_country  income_bracket  ATT_CODE  
0  'Black'            'US'          '>50K'     11035  

[1 rows x 30 columns]


In [57]:
%%bash

MODEL_DIR=output
gcloud ai-platform local train \
    --module-name trainer.task \
    --package-path trainer/ \
    --job-dir $MODEL_DIR \
    -- \
    --train-files $TRAIN_DATA \
    --eval-files $EVAL_DATA \
    --train-steps 50 \
    --eval-steps 20

In download data_dir is  /tmp/waves_data
Returning training file path in download(datadir) method   /tmp/waves_data/wavesAI.csv
Returning training file path in download(datadir) method   /tmp/waves_data/wavesAI.csv
/tmp/waves_data/wavesAI.csv
/tmp/waves_data/waves-validation-AI.csv
   ECN  ATT1  ATT2  ...  native_country  income_bracket  ATT_CODE
0    0    70    20  ...        'Canada'         '<=50K'     10005

[1 rows x 30 columns]
['ECN', 'ATT1', 'ATT2', 'ATT3', 'ATT4', 'ATT5', 'ATT6', 'ATT7', 'ATT8', 'ATT9', 'ATT10', 'ATT11', 'ATT12', 'ATT13', 'ATT14', 'ATT15', 'ATT16', 'ATT17', 'ATT18', 'ATT19', 'ATT20', 'ATT_CODE']
['ECN', 'ATT1', 'ATT2', 'ATT3', 'ATT4', 'ATT5', 'ATT6', 'ATT7', 'ATT8', 'ATT9', 'ATT10', 'ATT11', 'ATT12', 'ATT13', 'ATT14', 'ATT15', 'ATT16', 'ATT17', 'ATT18', 'ATT19', 'ATT20', 'ATT_CODE']

Epoch 00001: LearningRateScheduler reducing learning rate to 0.02.
Epoch 1/30

Epoch 00002: LearningRateScheduler reducing learning rate to 0.015.
Epoch 2/30

Epoch 00003: Learnin

2021-01-04 02:45:48.612959: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2200140000 Hz
2021-01-04 02:45:48.613621: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55ac5956a140 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-01-04 02:45:48.613660: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2021-01-04 02:45:48.613939: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2021-01-04 02:45:48.616440: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
Instructions for updating:
Use `tf.config.run_functions_eagerly` instead of the experimental version.
2021-01-04 02:45:49.133075: I tensorflow/core/profiler

In [58]:
%%bash

ls output/keras_export/


assets
saved_model.pb
variables


In [59]:

# Copy your model file to Cloud Storage
!echo $BUCKET_NAME
!gsutil cp  output/keras_export/saved_model.pb  gs://$BUCKET_NAME
   




wavesbucket-2
Copying file://output/keras_export/saved_model.pb [Content-Type=application/octet-stream]...
/ [1 files][936.5 KiB/936.5 KiB]                                                
Operation completed over 1 objects/936.5 KiB.                                    


In [60]:
from trainer import util
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

'''
_, _, eval_x, eval_y = util.load_data()
'''

#preprocess data
eval_df = pd.read_csv('waves-validation-AI.csv', names=_CSV_COLUMNS, header=0, na_values='?')
eval_df = util.preprocess(eval_df)

#split features and labels
eval_x, eval_y = eval_df, eval_df.pop('ATT_CODE')

#standardise features
eval_x = util.standardize(eval_x)

#Take samples
prediction_input = eval_x.sample(5)
prediction_input=prediction_input.replace(np.nan,0)
prediction_targets = np.array(eval_y[prediction_input.index])


#standardise labels
scaler = MinMaxScaler()
prediction_targets=scaler.fit_transform(prediction_targets.reshape((-1,1)))
print(prediction_targets)
yy=scaler.inverse_transform(np.array(prediction_targets).reshape(-1,1))
print(yy)

['ECN', 'ATT1', 'ATT2', 'ATT3', 'ATT4', 'ATT5', 'ATT6', 'ATT7', 'ATT8', 'ATT9', 'ATT10', 'ATT11', 'ATT12', 'ATT13', 'ATT14', 'ATT15', 'ATT16', 'ATT17', 'ATT18', 'ATT19', 'ATT20', 'ATT_CODE']
[[0.88888884]
 [0.        ]
 [0.44444466]
 [0.7777777 ]
 [1.        ]]
[[40025.   ]
 [32025.   ]
 [36025.004]
 [39025.   ]
 [41025.   ]]


In [61]:
print(prediction_input)
print(prediction_targets)

        ECN      ATT1      ATT2  ATT3  ATT4      ATT5  ATT6  ATT7      ATT8  \
8  0.693375 -1.752915  1.954340   0.0   0.0 -0.060432   0.0   0.0  0.403786   
0 -1.525426  0.932402 -0.781736   0.0   0.0 -0.060432   0.0   0.0  0.403786   
4 -0.416025  0.484849  0.000000   0.0   0.0 -0.785619   0.0   0.0  0.403786   
7  0.416025 -1.305362  0.000000   0.0   0.0 -0.060432   0.0   0.0 -1.238733   
9  0.970725  0.932402  0.000000   0.0   0.0 -1.510807   0.0   0.0  0.403786   

       ATT9  ...  ATT18  ATT19  ATT20  workclass  marital_status  occupation  \
8  0.490854  ...    0.0    0.0    0.0         -1              -1          -1   
0  0.490854  ...    0.0    0.0    0.0         -1              -1          -1   
4 -1.760197  ...    0.0    0.0    0.0         -1              -1          -1   
7 -1.760197  ...    0.0    0.0    0.0         -1              -1          -1   
9  0.490854  ...    0.0    0.0    0.0         -1              -1          -1   

   relationship  race  native_country  incom

In [62]:
import json
import numpy as np
with open('test.json', 'w') as json_file:
  for row in prediction_input.values.tolist():
    x=np.array(row)
    print (x.shape)
    x=x.reshape(-1,1)  # no of timesteps/features
    json.dump(x.tolist(), json_file)
    json_file.write('\n')

(28,)
(28,)
(28,)
(28,)
(28,)


In [63]:

%%bash

cat test.json

[[0.6933752298355103], [-1.7529150247573853], [1.9543399810791016], [0.0], [0.0], [-0.06043217331171036], [0.0], [0.0], [0.4037860929965973], [0.49085432291030884], [0.0], [-1.9277489185333252], [0.4021998345851898], [-0.28867536783218384], [0.6859626770019531], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [-1.0], [-1.0], [-1.0], [-1.0], [-1.0], [-1.0], [-1.0]]
[[-1.5254255533218384], [0.9324015378952026], [-0.7817359566688538], [0.0], [0.0], [-0.06043217331171036], [0.0], [0.0], [0.4037860929965973], [0.49085432291030884], [0.0], [0.38554978370666504], [0.4021998345851898], [-0.28867536783218384], [0.6859626770019531], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [-1.0], [-1.0], [-1.0], [-1.0], [-1.0], [-1.0], [-1.0]]
[[-0.41602516174316406], [0.4848487675189972], [0.0], [0.0], [0.0], [-0.7856193780899048], [0.0], [0.0], [0.4037860929965973], [-1.7601969242095947], [0.0], [0.38554978370666504], [0.4021998345851898], [-0.28867536783218384], [0.6859626770019531], [0.0], [0.0], [0.0], [0.0], [

In [64]:

%%bash

gcloud ai-platform local predict \
    --model-dir output/keras_export/ \
    --json-instances ./test.json

DENSE
[0.8874058723449707]
[0.2321588099002838]
[0.9001837968826294]
[0.22583416104316711]
[0.9040400981903076]


If the signature defined in the model is not serving_default then you must specify it via --signature-name flag, otherwise the command may fail.
Instructions for updating:
non-resource variables are not supported in the long term
2021-01-04 02:49:50.202494: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2200140000 Hz
2021-01-04 02:49:50.203465: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55ccb0290cd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-01-04 02:49:50.203505: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2021-01-04 02:49:50.203646: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.

In [88]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

#this are expected values
print ("Expected values")
scaler = MinMaxScaler()
prediction_targets = np.array(eval_y[prediction_input.index])
print (prediction_targets)
#print (prediction_targets[0])
prediction_targets=scaler.fit_transform(prediction_targets.reshape((-1,1)))


#Predicted values
print("\nPredicted values")
#print(scaler.inverse_transform(np.array(0.8874058723449707).reshape(-1,1)))

p=[]


x=scaler.inverse_transform(np.array(0.8874058723449707).reshape(-1,1))
p.append(x.astype('int64'))

x=scaler.inverse_transform(np.array(0.2321588099002838).reshape(-1,1))
p.append(x.astype('int64'))

x=scaler.inverse_transform(np.array(0.9001837968826294).reshape(-1,1))
p.append(x.astype('int64'))

x=scaler.inverse_transform(np.array(0.22583416104316711).reshape(-1,1))
p.append(x.astype('int64'))

x=scaler.inverse_transform(np.array(0.9040400981903076).reshape(-1,1))
p.append(x.astype('int64'))

print(np.concatenate(p).ravel())

Expected values
[40025. 32025. 36025. 39025. 41025.]

Predicted values
[40011 34114 40126 34057 40161]


In [89]:
%%bash
export TRAIN_DATA=gs://$BUCKET_NAME/wavesAI.csv
export EVAL_DATA=gs://$BUCKET_NAME/waves-validation-AI.csv
         

In [90]:
%%bash

gsutil cp test.json gs://$BUCKET_NAME/data/test.json

Copying file://test.json [Content-Type=application/json]...
/ [1 files][  1.7 KiB/  1.7 KiB]                                                
Operation completed over 1 objects/1.7 KiB.                                      


In [91]:

%%bash

export TEST_JSON=gs://$BUCKET_NAME/data/test.json

In [92]:
%%bash

JOB_ID=waves$(date -u +%y%m%d_%H%M%S)
OUTPUT_PATH=gs://$BUCKET_NAME/$JOB_ID
gcloud ai-platform jobs submit training $JOB_ID \
    --job-dir $OUTPUT_PATH \
    --runtime-version $TFVERSION \
    --python-version $PYTHONVERSION \
    --module-name trainer.task \
    --package-path trainer/ \
    --region $REGION \
    -- \
    --train-files $TRAIN_DATA \
    --eval-files $EVAL_DATA \
    --train-steps 10 \
    --eval-steps 5 \
    --verbosity DEBUG

jobId: waves210104_033002
state: QUEUED


Job [waves210104_033002] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe waves210104_033002

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs waves210104_033002


In [98]:
os.environ["JOB_ID"] = "waves210104_033002" # Replace with your job id

os.environ["MODEL_NAME"] = "wavesAIcentral"

/bin/bash: os.environ[JOB_ID]: command not found
/bin/bash: os.environ[MODEL_NAME]: command not found


In [None]:
import tensorflow as tf
print (tf.__version__)

In [93]:
%%bash
gcloud ai-platform jobs describe waves210104_033002

createTime: '2021-01-04T03:30:04Z'
endTime: '2021-01-04T03:38:52Z'
etag: _65ajbilDt8=
jobId: waves210104_033002
startTime: '2021-01-04T03:37:52Z'
state: SUCCEEDED
trainingInput:
  args:
  - --train-files
  - --eval-files
  - --train-steps
  - '10'
  - --eval-steps
  - '5'
  - --verbosity
  - DEBUG
  jobDir: gs://wavesbucket-2/waves210104_033002
  packageUris:
  - gs://wavesbucket-2/waves210104_033002/packages/45c58e1be43be11c68b5b815f445887e9990ee8dd40a2e93219b05b3d04664b0/trainer-0.0.0.tar.gz
  pythonModule: trainer.task
  pythonVersion: '3.7'
  region: us-central1
  runtimeVersion: '2.3'
trainingOutput:
  consumedMLUnits: 0.06



View job in the Cloud Console at:
https://console.cloud.google.com/mlengine/jobs/waves210104_033002?project=wavesproject

View logs at:
https://console.cloud.google.com/logs?resource=ml_job%2Fjob_id%2Fwaves210104_033002&project=wavesproject


In [None]:
#can create only in central region us-central1
MODEL_NAME= "wavesAIcentral"
os.environ["TFVERSION"] = "2.3"
os.environ["PYTHONVERSION"] = "3.7"

!gcloud ai-platform models create $MODEL_NAME --regions=$REGION


In [None]:
%%bash
TFVERSION="2.3"

echo $TFVERSION
echo $PYTHONVERSION
MODEL_NAME="wavesAIcentral"

OUTPUT_PATH=gs://$BUCKET_NAME/$JOB_ID
echo $MODEL_NAME

MODEL_BINARIES=$OUTPUT_PATH/keras_export/
echo $MODEL_BINARIES

gcloud ai-platform versions create v1 \
--model $MODEL_NAME \
--origin $MODEL_BINARIES \
--runtime-version $TFVERSION \
--framework='TensorFlow'


In [108]:
%%bash

gcloud ai-platform models list

NAME            DEFAULT_VERSION_NAME
ChurnPredictor  V1
waves
wavesAIcentral  v1
wavescentral    v1
wavescentral2   v1


Using endpoint [https://ml.googleapis.com/]


In [None]:
gcloud ai-platform predict \
--model $MODEL_NAME \
--version v1 \
--json-instances ./test.json

In [None]:
!echo $MODEL_NAME
import tensorflow as tf
#tf.enable_eager_execution()
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
first_graph = tf.Graph()
first_session = tf.compat.v1.Session(config=config)

with  first_session.as_default():
    !gcloud ai-platform predict \
   --model $MODEL_NAME \
   --json-instances ./test.json \
   --version v1 




In [None]:
%%bash
cat ./test.json | head -1
MODEL_NAME='wavesAIcentral'
echo $MODEL_NAME

gcloud ai-platform predict \
--model $MODEL_NAME \
--json-instances ./test.json \
--version v1 

In [None]:
from trainer import util
_, _, eval_x, eval_y = util.load_data()
num_wit_examples = 5
test_examples = np.hstack((eval_x[:num_wit_examples].values,eval_y[:num_wit_examples].reshape(-1,1)))

In [None]:
import witwidget
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder
VERSION_NAME='v1'
MODEL_NAME='wavesAIcentral'
config_builder = (WitConfigBuilder(test_examples.tolist(), eval_x.columns.tolist() + ['ATT_CODE'])
  .set_ai_platform_model(PROJECT, MODEL_NAME, VERSION_NAME)
  .set_target_feature('ATT_CODE')
  .set_label_vocab(['Attention Code']))
WitWidget(config_builder, height=800)