In [1]:
%%javascript
function clear_other_cells () {
document.getElementById('file_menu').parentNode.style.display = 'none';
document.getElementById('logout').style.display = 'none';
document.getElementById('notebook_name').style.display = 'none';
var this_cell = $(element).closest('.cell').data('cell');
Jupyter.notebook.get_cells().forEach(function (cell) {
if (cell.cell_type === 'code' && cell !== this_cell) {
cell.clear_output();
}
Jupyter.notebook.set_dirty(true);
});
};
$([IPython.events]).on('notebook_loaded.Notebook', clear_other_cells());
$(window).on("beforeunload", function(){
Jupyter.notebook.kernel.restart();
});


<IPython.core.display.Javascript object>

<ul style="list-style-type:circle;font-size:14px;line-height:27px;">
    <li><b>from streamanalytix.python.dataset import Dataset:</b> Load Dataset class from streamanalytix API.</li>
	<li><b>Dataset(source_name):</b> Creates object of dataset class for given source.
		<ul>
			<li><b>Dataset.get_dataframe():</b> Read data source and return a pandas dataframe. </li>
		</ul>
	</li>
	<li><b>from streamanalytix.utilities import sax_utils:</b> Load sax_util script. It has following functions
		<ul>
			<li><b>sax_utils.save_and_download_model(model_name, model_object):</b>This method lets the user to save and download the trained model with StreamAnalytix. This model can than be used for training and/or scoring purpose as part of the StreamAnalytix pipeline</li>
				<ul>
					<li><b>model_name:</b> Name of the model. Accepted String value only. e.g. <i>"DecisionTreeModel"</i></li>
					<li><b>model_object:</b> Object of the trained model</li>
				</ul>
			<li><b>sax_utils.get_h2o_cluster_url(cluster_name):</b>This method lets the user to get h2o cluster url by providing cluster name</li>
				<ul>
					<li><b>cluster_name:</b> Name of the cluster. Accepted String value only. e.g. <i>"TrainingCluster"</i></li>
				</ul>
			<li><b>sax_utils.upload_and_register_h2o_model(model_object, model_name, model_type, project_name, project_version):</b>This method lets the user to upload and register h2o model in 'mojo' format in streamanalytix</li>
				<ul>
					<li><b>model_object:</b> Object of trained H2O model</li>
					<li><b>model_name:</b>Name of the model. Accepted String value only. e.g. <i>"H2OTreeModel"</i>
					<li><b>model_type:</b>Type of trained model. Accepted String value only. We support H2O model of types :<i>"DistributedRandomForest"</i>,<i>"GeneralizedLinearModelling"</i>,<i>"IsolationForest"</i>,<i>"GradientBoostingMachine"</i></li>
					<li><b>project_name:</b>Project Name in which model should register. Accepted String value only. e.g. <i>"MyProject"</i></li>
					<li><b>project_version:</b>Version of given project in which model should register</li>
				</ul>
		</ul>
	</li>
</ul>


In [3]:
from streamanalytix.python.dataset import Dataset
from streamanalytix.utilities import sax_utils


dataset_1 = Dataset("HistoricalTransactions")

# you can use pandas to create dataframe as shown below
df = dataset_1.get_dataframe()
import h2o
h2o_server_url = sax_utils.get_h2o_cluster_url("ESINotebookV1")
h2o.connect(url=h2o_server_url)
print(h2o.cluster_status)

Dataframe created
Connecting to H2O server at http://192.168.7.32:54321 ... successful.


0,1
H2O cluster uptime:,1 hour 16 mins
H2O cluster timezone:,America/Los_Angeles
H2O data parsing timezone:,UTC
H2O cluster version:,3.28.1.3
H2O cluster version age:,2 months and 21 days
H2O cluster name:,ESINotebookV1
H2O cluster total nodes:,1
H2O cluster free memory:,833 Mb
H2O cluster total cores:,24
H2O cluster allowed cores:,4


<function cluster_status at 0x7f5ca3d3c398>


In [None]:
creditcard_dataset = h2o.H2OFrame(df)
creditcard_dataset.head(5)

In [None]:
print('Count of frauds')
print(creditcard_dataset.type("isFraud"))
creditcard_dataset['isFraud'] = creditcard_dataset['isFraud'].asfactor()
print(creditcard_dataset.type("isFraud"))
print(df.isFraud.value_counts())
print(df.shape)

In [None]:
# get list of features
features = list(creditcard_dataset.columns)
features = [feature for feature in features if feature not in ('nameOrig','nameDest','isFraud')]
response = 'isFraud'

In [None]:
creditcard_dataset.anyfactor()
creditcard_dataset.columns_by_type(coltype='string')
creditcard_dataset.impute("type", method="mean")

In [None]:
from h2o.estimators.gbm import H2OGradientBoostingEstimator
# split into train and validation sets
train, valid= creditcard_dataset.split_frame(ratios = [.8], seed = 1234)

# try using the `categorical_encoding` parameter:
encoding = "one_hot_explicit"

# initialize the estimator
creditcard_dataset_gbm = H2OGradientBoostingEstimator(categorical_encoding = encoding, seed =1234)

# then train the model
creditcard_dataset_gbm.train(x = features, y = response, training_frame = train, validation_frame = valid)

In [None]:
sax_utils.upload_and_register_h2o_model(creditcard_dataset_gbm, 'FraudDetectionModel', 'GradientBoostingMachine', 'FraudDetectionMobileV1' , 1)

In [None]:
#Model version updated