In [1]:
import os
import json
import pandas as pd
import numpy as np

from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [2]:
data_name = 'user_defined'
def output_data(cols, data, target_names, real_min, real_max, y_pred, y_gt):
    filename = "./output/"+data_name+"/test.json"
    directory = os.path.dirname(filename)
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    to_output = {}
    to_output['columns'] = cols
    to_output['data'] = data
    to_output['target_names'] = target_names
    to_output['real_min'] = real_min
    to_output['real_max'] = real_max
    to_output['y_pred'] = y_pred
    to_output['y_gt'] = y_gt
    with open(filename, 'w') as output:
        output.write(json.dumps(to_output))

## Read Data
You can change the code below to read your own data.

In [3]:
df = pd.read_csv(filepath_or_buffer="./input/synthetic_data.csv", header=0, index_col=None)
df.head()

Unnamed: 0,x1,x2,x3,x4,x5,y
0,0.368111,-0.766453,-1.0,1.0,-1.0,0
1,0.855474,-0.245013,1.0,1.0,-1.0,0
2,-0.527025,0.626383,-1.0,1.0,1.0,0
3,0.616431,-0.475369,1.0,-1.0,1.0,0
4,0.016539,-0.975422,1.0,1.0,-1.0,1


In [4]:
'''prepare data'''
X = df.drop(columns=['y']).values
y = df['y'].values
y = y.reshape(len(y))

train, test, train_labels, test_labels = train_test_split(X, y, test_size = 0.2)

train_df = pd.DataFrame(train, columns=df.columns[:-1].values)
test_df = pd.DataFrame(test, columns=df.columns[:-1].values)

'''train the model'''
clf = MLPClassifier(random_state=1, max_iter=300)
clf.fit(train, train_labels)

'''report accuracy'''
print(clf.score(test,test_labels ))

0.975




## Output Training Data

In [5]:
''' name the target classes '''
target_names = ["False", "True"]

''' obtain the min and max values '''
min_val = np.min(X, axis=0)
max_val = np.max(X, axis=0)

''' obtain the original model's prediction '''
y_pred = clf.predict(train)

''' output data '''
output_data(df.columns[:-1].values.tolist(), train.tolist(), target_names, 
            min_val.tolist(), max_val.tolist(),
            y_pred.tolist(), train_labels.tolist())

In [6]:
''' obtain and ouput histogram for each feature '''

dist_list = []

for attr_idx in range(X.shape[1]):
    hist = np.histogram(X[:, attr_idx], bins=10, range=(min_val[attr_idx], max_val[attr_idx]))
    dist_list.append({
        'hist': hist[0].tolist(),
        'bin_edges': hist[1].tolist(),
    })
    
with open('./output/'+ data_name + '/histogram.json', 'w') as output:
    output.write(json.dumps({'histogram': dist_list}))