In [1]:
%%javascript
function clear_other_cells () {
document.getElementById('file_menu').parentNode.style.display = 'none';
document.getElementById('logout').style.display = 'none';
document.getElementById('notebook_name').style.display = 'none';
var this_cell = $(element).closest('.cell').data('cell');
Jupyter.notebook.get_cells().forEach(function (cell) {
if (cell.cell_type === 'code' && cell !== this_cell) {
cell.clear_output();
}
Jupyter.notebook.set_dirty(true);
});
};
$([IPython.events]).on('notebook_loaded.Notebook', clear_other_cells());
$(window).on("beforeunload", function(){
Jupyter.notebook.kernel.restart();
});
$([IPython.events]).on('notebook_loaded.Notebook', function() {
Jupyter.notebook.set_autosave_interval(0);
});


<IPython.core.display.Javascript object>

<ul style="list-style-type:circle;font-size:14px;line-height:27px;">
    <li><b>from streamanalytix.python.dataset import Dataset:</b> Load Dataset class from streamanalytix API.</li>
	<li><b>Dataset(source_name):</b> Creates object of dataset class for given source.
		<ul>
			<li><b>Dataset.get_dataframe():</b> Read data source and return a pandas dataframe. </li>
		</ul>
	</li>
	<li><b>from streamanalytix.utilities import sax_utils:</b> Load sax_util script. It has following functions
		<ul>
			<li><b>sax_utils.save_and_download_model(model_name, model_object):</b>This method lets the user to save and download the trained model with StreamAnalytix. This model can than be used for training and/or scoring purpose as part of the StreamAnalytix pipeline</li>
				<ul>
					<li><b>model_name:</b> Name of the model. Accepted String value only. e.g. <i>"DecisionTreeModel"</i></li>
					<li><b>model_object:</b> Object of the trained model</li>
				</ul>
			<li><b>sax_utils.get_h2o_cluster_url(cluster_name):</b>This method lets the user to get h2o cluster url by providing cluster name</li>
				<ul>
					<li><b>cluster_name:</b> Name of the cluster. Accepted String value only. e.g. <i>"TrainingCluster"</i></li>
				</ul>
			<li><b>sax_utils.upload_and_register_h2o_model(model_object, model_name, model_type, project_name, project_version, workspace_name):</b>This method lets the user to upload and register h2o model in 'mojo' format in streamanalytix</li>
				<ul>
					<li><b>model_object:</b> Object of trained H2O model</li>
					<li><b>model_name:</b>Name of the model. Accepted String value only. e.g. <i>"H2OTreeModel"</i>
					<li><b>model_type:</b>Type of trained model. Accepted String value only. We support H2O model of types :<i>"DistributedRandomForest"</i>,<i>"GeneralizedLinearModelling"</i>,<i>"IsolationForest"</i>,<i>"GradientBoostingMachine"</i></li>
					<li><b>project_name:</b>Project Name in which model should register. Accepted String value only. e.g. <i>"MyProject"</i></li>
					<li><b>project_version:</b>Version of given project in which model should register</li>
					<li><b>workspace_name:</b>Workspace Name in which model should register. Accepted String value only. e.g. <i>"MyWorkspace"</i></li>
				</ul>
		</ul>
	</li>
</ul>


In [None]:
import shutup
shutup.please()

In [None]:
from dfml.python.dataset import Dataset
from dfml.utilities import dfml_utils


dataset_1 = Dataset("train.csv")
dataset_2 = Dataset("cnc_mill_experiments_worn.csv")
dataset_3 = Dataset("cnc_mill_experiments_unworn.csv")

# you can use pandas to create dataframe as shown below
# df = dataset_1.get_dataframe()

In [None]:
!python3 --version

In [None]:
!pip3 list | grep "scikit"

In [None]:
!conda

In [None]:
# imports
import numpy as np
import pandas as pd

from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, OneHotEncoder
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import sklearn
print(sklearn.__version__)

In [None]:
# load data
master_df = dataset_1.get_dataframe()
worn_df = dataset_2.get_dataframe()
unworn_df = dataset_3.get_dataframe()

In [None]:
# concat experiments
experiments_df = pd.concat([worn_df, unworn_df], ignore_index=True)
experiments_numbers = sorted(experiments_df['exp_num'].unique())
print(experiments_numbers)

In [None]:
master_df.head()

In [None]:
worn_df.head()

In [None]:
# preprocessing
master_df['passed_visual_inspection'] = master_df['passed_visual_inspection'].fillna('no')

In [None]:
# join experiments df and master df
for current_experiment in experiments_numbers:
    experiments_row = master_df[master_df['No'] == current_experiment]
    experiments_df.loc[(experiments_df['exp_num'] == current_experiment), 'material'] = experiments_row.iloc[0][
        'material']
    experiments_df.loc[(experiments_df['exp_num'] == current_experiment), 'feedrate'] = experiments_row.iloc[0][
        'feedrate']
    experiments_df.loc[(experiments_df['exp_num'] == current_experiment), 'clamp_pressure'] = experiments_row.iloc[0][
        'clamp_pressure']
    experiments_df.loc[(experiments_df['exp_num'] == current_experiment), 'tool_condition'] = experiments_row.iloc[0][
        'tool_condition']
    experiments_df.loc[(experiments_df['exp_num'] == current_experiment), 'machining_finalized'] = \
        experiments_row.iloc[0][
            'machining_finalized']
    experiments_df.loc[(experiments_df['exp_num'] == current_experiment), 'passed_visual_inspection'] = \
        experiments_row.iloc[0]['passed_visual_inspection']



In [None]:
# values replacement
experiments_df.replace({'Machining_Process': {'Starting': 'Prep', 'end': 'End'}}, inplace=True)

In [None]:
fig, ax = plt.subplots(2,2, figsize=(12,10))
sns.countplot(experiments_df['tool_condition'], ax=ax[0,0])
sns.countplot(experiments_df['machining_finalized'], ax=ax[0,1])
sns.countplot(experiments_df['passed_visual_inspection'], ax=ax[1,0])
sns.countplot(experiments_df['material'], ax=ax[1,1])
 
# Show the plot
plt.show()

In [None]:
fig, ax = plt.subplots(1,2, figsize=(12,5))
#sns.catplot(data=experiments_df, kind="bar",x="tool_condition", y="passed_visual_inspection",ax=ax[0,0])
sns.countplot(x="tool_condition", hue="passed_visual_inspection", data=experiments_df, ax=ax[0])
sns.countplot(x="tool_condition", hue="machining_finalized", data=experiments_df, ax=ax[1])
# Show the plot
plt.show()

In [None]:
plt.figure(figsize=(12,5))
sns.countplot(experiments_df['Machining_Process'])
plt.title("Machining Process Count")
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.pairplot(experiments_df, hue='tool_condition', vars=["feedrate","clamp_pressure"])
plt.title("Tool Condition - feedrate/clamp pressure", y=1.1, fontsize=20)
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.pairplot(experiments_df, hue='machining_finalized', vars=["feedrate","clamp_pressure"])
plt.title("Machining Finalized - feedrate/clamp pressure", y=1.1, fontsize=20)
plt.show()

In [None]:
# Feature Creation
for ax in ['X', 'Y', 'Z', 'S']:
    experiments_df[ax + '1_Position_Diff'] = abs(
        experiments_df[ax + '1_CommandPosition'] - experiments_df[ax + '1_ActualPosition'])
    experiments_df[ax + '1_Velocity_Diff'] = abs(
        experiments_df[ax + '1_CommandVelocity'] - experiments_df[ax + '1_ActualVelocity'])
    experiments_df[ax + '1_Acceleration_Diff'] = abs(
        experiments_df[ax + '1_CommandAcceleration'] - experiments_df[ax + '1_ActualAcceleration'])

In [None]:
feature_df = experiments_df.copy()
#feature_df['tool_condition'] = LabelEncoder().fit_transform(feature_df['tool_condition']).astype(np.int8)

categorical_features = ['Machining_Process']
categorical_transformer = Pipeline(steps=[('ordinalEncoder', OrdinalEncoder()), ('onehotEncoder', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(transformers=[('cat', categorical_transformer, categorical_features)],
                                     remainder='passthrough')

clf_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', RandomForestClassifier())])

y_series = feature_df['tool_condition']
x_df = feature_df.drop(['tool_condition', 'machining_finalized', 'passed_visual_inspection', 'material', 'exp_num'],
                           axis=1)
X_train, X_valid, Y_train, Y_valid = train_test_split(x_df, y_series, test_size=0.2, random_state=0,
                                                          stratify=y_series)
print('features')
print(X_train.columns)
clf_pipeline.fit(X_train, Y_train)
y_predict = clf_pipeline.predict(X_valid)
score = accuracy_score(Y_valid, y_predict)
print(score)

In [None]:
dfml_utils.save_and_download_model('CNCMillToolWearRF1_13Sep_131.pkl', clf_pipeline)