From 4da369451688971cfdd9ba3f3b16a6e3f5f2135c Mon Sep 17 00:00:00 2001
From: Yaw Joseph Etse <yaw.etse@gmail.com>
Date: Sun, 31 Jul 2022 13:51:24 -0400
Subject: [PATCH] feat: adding support to generate models from google sheets

---
 src/__test__/mock_automl_data.ts | 1074 ++++++++++++++++++++++++++++++
 src/automl.test.ts               |  107 +++
 src/constants.ts                 |    2 +-
 src/index.ts                     |    3 +
 src/jsonm.test.ts                |    1 +
 src/jsonm.ts                     |  209 +++++-
 6 files changed, 1377 insertions(+), 19 deletions(-)
 create mode 100644 src/__test__/mock_automl_data.ts
 create mode 100644 src/automl.test.ts

diff --git a/src/__test__/mock_automl_data.ts b/src/__test__/mock_automl_data.ts
new file mode 100644
index 0000000..4ed7c8e
--- /dev/null
+++ b/src/__test__/mock_automl_data.ts
@@ -0,0 +1,1074 @@
+export const autoMLdata = {
+  "outputs": 5,
+  "rowRange": [
+    1,
+    151
+  ],
+  "colRange": [
+    1,
+    5
+  ],
+  "data": [
+    [
+      "sepal_length_cm",
+      "sepal_width_cm",
+      "petal_length_cm",
+      "petal_width_cm",
+      "plant"
+    ],
+    [
+      5.1,
+      3.5,
+      1.4,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.9,
+      3,
+      1.4,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.7,
+      3.2,
+      1.3,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.6,
+      3.1,
+      1.5,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      5,
+      3.6,
+      1.4,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      5.4,
+      3.9,
+      1.7,
+      0.4,
+      "Iris-setosa"
+    ],
+    [
+      4.6,
+      3.4,
+      1.4,
+      0.3,
+      "Iris-setosa"
+    ],
+    [
+      5,
+      3.4,
+      1.5,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.4,
+      2.9,
+      1.4,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.9,
+      3.1,
+      1.5,
+      0.1,
+      "Iris-setosa"
+    ],
+    [
+      5.4,
+      3.7,
+      1.5,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.8,
+      3.4,
+      1.6,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.8,
+      3,
+      1.4,
+      0.1,
+      "Iris-setosa"
+    ],
+    [
+      4.3,
+      3,
+      1.1,
+      0.1,
+      "Iris-setosa"
+    ],
+    [
+      5.8,
+      4,
+      1.2,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      5.7,
+      4.4,
+      1.5,
+      0.4,
+      "Iris-setosa"
+    ],
+    [
+      5.4,
+      3.9,
+      1.3,
+      0.4,
+      "Iris-setosa"
+    ],
+    [
+      5.1,
+      3.5,
+      1.4,
+      0.3,
+      "Iris-setosa"
+    ],
+    [
+      5.7,
+      3.8,
+      1.7,
+      0.3,
+      "Iris-setosa"
+    ],
+    [
+      5.1,
+      3.8,
+      1.5,
+      0.3,
+      ""
+    ],
+    [
+      5.4,
+      3.4,
+      1.7,
+      0.2,
+      ""
+    ],
+    [
+      5.1,
+      3.7,
+      1.5,
+      0.4,
+      ""
+    ],
+    [
+      4.6,
+      3.6,
+      1,
+      0.2,
+      ""
+    ],
+    [
+      5.1,
+      3.3,
+      1.7,
+      0.5,
+      ""
+    ],
+    [
+      4.8,
+      3.4,
+      1.9,
+      0.2,
+      ""
+    ],
+    [
+      5,
+      3,
+      1.6,
+      0.2,
+      ""
+    ],
+    [
+      5,
+      3.4,
+      1.6,
+      0.4,
+      ""
+    ],
+    [
+      5.2,
+      3.5,
+      1.5,
+      0.2,
+      ""
+    ],
+    [
+      5.2,
+      3.4,
+      1.4,
+      0.2,
+      ""
+    ],
+    [
+      4.7,
+      3.2,
+      1.6,
+      0.2,
+      ""
+    ],
+    [
+      4.8,
+      3.1,
+      1.6,
+      0.2,
+      ""
+    ],
+    [
+      5.4,
+      3.4,
+      1.5,
+      0.4,
+      ""
+    ],
+    [
+      5.2,
+      4.1,
+      1.5,
+      0.1,
+      ""
+    ],
+    [
+      5.5,
+      4.2,
+      1.4,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.9,
+      3.1,
+      1.5,
+      0.1,
+      "Iris-setosa"
+    ],
+    [
+      5,
+      3.2,
+      1.2,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      5.5,
+      3.5,
+      1.3,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.9,
+      3.1,
+      1.5,
+      0.1,
+      "Iris-setosa"
+    ],
+    [
+      4.4,
+      3,
+      1.3,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      5.1,
+      3.4,
+      1.5,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      5,
+      3.5,
+      1.3,
+      0.3,
+      "Iris-setosa"
+    ],
+    [
+      4.5,
+      2.3,
+      1.3,
+      0.3,
+      "Iris-setosa"
+    ],
+    [
+      4.4,
+      3.2,
+      1.3,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      5,
+      3.5,
+      1.6,
+      0.6,
+      "Iris-setosa"
+    ],
+    [
+      5.1,
+      3.8,
+      1.9,
+      0.4,
+      "Iris-setosa"
+    ],
+    [
+      4.8,
+      3,
+      1.4,
+      0.3,
+      "Iris-setosa"
+    ],
+    [
+      5.1,
+      3.8,
+      1.6,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      4.6,
+      3.2,
+      1.4,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      5.3,
+      3.7,
+      1.5,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      5,
+      3.3,
+      1.4,
+      0.2,
+      "Iris-setosa"
+    ],
+    [
+      7,
+      3.2,
+      4.7,
+      1.4,
+      "Iris-versicolor"
+    ],
+    [
+      6.4,
+      3.2,
+      4.5,
+      1.5,
+      "Iris-versicolor"
+    ],
+    [
+      6.9,
+      3.1,
+      4.9,
+      1.5,
+      "Iris-versicolor"
+    ],
+    [
+      5.5,
+      2.3,
+      4,
+      1.3,
+      "Iris-versicolor"
+    ],
+    [
+      6.5,
+      2.8,
+      4.6,
+      1.5,
+      "Iris-versicolor"
+    ],
+    [
+      5.7,
+      2.8,
+      4.5,
+      1.3,
+      "Iris-versicolor"
+    ],
+    [
+      6.3,
+      3.3,
+      4.7,
+      1.6,
+      "Iris-versicolor"
+    ],
+    [
+      4.9,
+      2.4,
+      3.3,
+      1,
+      "Iris-versicolor"
+    ],
+    [
+      6.6,
+      2.9,
+      4.6,
+      1.3,
+      "Iris-versicolor"
+    ],
+    [
+      5.2,
+      2.7,
+      3.9,
+      1.4,
+      "Iris-versicolor"
+    ],
+    [
+      5,
+      2,
+      3.5,
+      1,
+      "Iris-versicolor"
+    ],
+    [
+      5.9,
+      3,
+      4.2,
+      1.5,
+      ""
+    ],
+    [
+      6,
+      2.2,
+      4,
+      1,
+      ""
+    ],
+    [
+      6.1,
+      2.9,
+      4.7,
+      1.4,
+      ""
+    ],
+    [
+      5.6,
+      2.9,
+      3.6,
+      1.3,
+      ""
+    ],
+    [
+      6.7,
+      3.1,
+      4.4,
+      1.4,
+      ""
+    ],
+    [
+      5.6,
+      3,
+      4.5,
+      1.5,
+      ""
+    ],
+    [
+      5.8,
+      2.7,
+      4.1,
+      1,
+      ""
+    ],
+    [
+      6.2,
+      2.2,
+      4.5,
+      1.5,
+      ""
+    ],
+    [
+      5.6,
+      2.5,
+      3.9,
+      1.1,
+      ""
+    ],
+    [
+      5.9,
+      3.2,
+      4.8,
+      1.8,
+      ""
+    ],
+    [
+      6.1,
+      2.8,
+      4,
+      1.3,
+      ""
+    ],
+    [
+      6.3,
+      2.5,
+      4.9,
+      1.5,
+      ""
+    ],
+    [
+      6.1,
+      2.8,
+      4.7,
+      1.2,
+      "Iris-versicolor"
+    ],
+    [
+      6.4,
+      2.9,
+      4.3,
+      1.3,
+      "Iris-versicolor"
+    ],
+    [
+      6.6,
+      3,
+      4.4,
+      1.4,
+      "Iris-versicolor"
+    ],
+    [
+      6.8,
+      2.8,
+      4.8,
+      1.4,
+      "Iris-versicolor"
+    ],
+    [
+      6.7,
+      3,
+      5,
+      1.7,
+      "Iris-versicolor"
+    ],
+    [
+      6,
+      2.9,
+      4.5,
+      1.5,
+      "Iris-versicolor"
+    ],
+    [
+      5.7,
+      2.6,
+      3.5,
+      1,
+      "Iris-versicolor"
+    ],
+    [
+      5.5,
+      2.4,
+      3.8,
+      1.1,
+      "Iris-versicolor"
+    ],
+    [
+      5.5,
+      2.4,
+      3.7,
+      1,
+      "Iris-versicolor"
+    ],
+    [
+      5.8,
+      2.7,
+      3.9,
+      1.2,
+      "Iris-versicolor"
+    ],
+    [
+      6,
+      2.7,
+      5.1,
+      1.6,
+      "Iris-versicolor"
+    ],
+    [
+      5.4,
+      3,
+      4.5,
+      1.5,
+      "Iris-versicolor"
+    ],
+    [
+      6,
+      3.4,
+      4.5,
+      1.6,
+      "Iris-versicolor"
+    ],
+    [
+      6.7,
+      3.1,
+      4.7,
+      1.5,
+      "Iris-versicolor"
+    ],
+    [
+      6.3,
+      2.3,
+      4.4,
+      1.3,
+      "Iris-versicolor"
+    ],
+    [
+      5.6,
+      3,
+      4.1,
+      1.3,
+      ""
+    ],
+    [
+      5.5,
+      2.5,
+      4,
+      1.3,
+      ""
+    ],
+    [
+      5.5,
+      2.6,
+      4.4,
+      1.2,
+      ""
+    ],
+    [
+      6.1,
+      3,
+      4.6,
+      1.4,
+      ""
+    ],
+    [
+      5.8,
+      2.6,
+      4,
+      1.2,
+      ""
+    ],
+    [
+      5,
+      2.3,
+      3.3,
+      1,
+      ""
+    ],
+    [
+      5.6,
+      2.7,
+      4.2,
+      1.3,
+      ""
+    ],
+    [
+      5.7,
+      3,
+      4.2,
+      1.2,
+      ""
+    ],
+    [
+      5.7,
+      2.9,
+      4.2,
+      1.3,
+      ""
+    ],
+    [
+      6.2,
+      2.9,
+      4.3,
+      1.3,
+      ""
+    ],
+    [
+      5.1,
+      2.5,
+      3,
+      1.1,
+      ""
+    ],
+    [
+      5.7,
+      2.8,
+      4.1,
+      1.3,
+      ""
+    ],
+    [
+      6.3,
+      3.3,
+      6,
+      2.5,
+      ""
+    ],
+    [
+      5.8,
+      2.7,
+      5.1,
+      1.9,
+      ""
+    ],
+    [
+      7.1,
+      3,
+      5.9,
+      2.1,
+      ""
+    ],
+    [
+      6.3,
+      2.9,
+      5.6,
+      1.8,
+      ""
+    ],
+    [
+      6.5,
+      3,
+      5.8,
+      2.2,
+      ""
+    ],
+    [
+      7.6,
+      3,
+      6.6,
+      2.1,
+      ""
+    ],
+    [
+      4.9,
+      2.5,
+      4.5,
+      1.7,
+      ""
+    ],
+    [
+      7.3,
+      2.9,
+      6.3,
+      1.8,
+      "Iris-virginica"
+    ],
+    [
+      6.7,
+      2.5,
+      5.8,
+      1.8,
+      "Iris-virginica"
+    ],
+    [
+      7.2,
+      3.6,
+      6.1,
+      2.5,
+      "Iris-virginica"
+    ],
+    [
+      6.5,
+      3.2,
+      5.1,
+      2,
+      "Iris-virginica"
+    ],
+    [
+      6.4,
+      2.7,
+      5.3,
+      1.9,
+      "Iris-virginica"
+    ],
+    [
+      6.8,
+      3,
+      5.5,
+      2.1,
+      "Iris-virginica"
+    ],
+    [
+      5.7,
+      2.5,
+      5,
+      2,
+      "Iris-virginica"
+    ],
+    [
+      5.8,
+      2.8,
+      5.1,
+      2.4,
+      "Iris-virginica"
+    ],
+    [
+      6.4,
+      3.2,
+      5.3,
+      2.3,
+      "Iris-virginica"
+    ],
+    [
+      6.5,
+      3,
+      5.5,
+      1.8,
+      "Iris-virginica"
+    ],
+    [
+      7.7,
+      3.8,
+      6.7,
+      2.2,
+      "Iris-virginica"
+    ],
+    [
+      7.7,
+      2.6,
+      6.9,
+      2.3,
+      "Iris-virginica"
+    ],
+    [
+      6,
+      2.2,
+      5,
+      1.5,
+      "Iris-virginica"
+    ],
+    [
+      6.9,
+      3.2,
+      5.7,
+      2.3,
+      "Iris-virginica"
+    ],
+    [
+      5.6,
+      2.8,
+      4.9,
+      2,
+      "Iris-virginica"
+    ],
+    [
+      7.7,
+      2.8,
+      6.7,
+      2,
+      "Iris-virginica"
+    ],
+    [
+      6.3,
+      2.7,
+      4.9,
+      1.8,
+      "Iris-virginica"
+    ],
+    [
+      6.7,
+      3.3,
+      5.7,
+      2.1,
+      "Iris-virginica"
+    ],
+    [
+      7.2,
+      3.2,
+      6,
+      1.8,
+      "Iris-virginica"
+    ],
+    [
+      6.2,
+      2.8,
+      4.8,
+      1.8,
+      "Iris-virginica"
+    ],
+    [
+      6.1,
+      3,
+      4.9,
+      1.8,
+      "Iris-virginica"
+    ],
+    [
+      6.4,
+      2.8,
+      5.6,
+      2.1,
+      "Iris-virginica"
+    ],
+    [
+      7.2,
+      3,
+      5.8,
+      1.6,
+      "Iris-virginica"
+    ],
+    [
+      7.4,
+      2.8,
+      6.1,
+      1.9,
+      "Iris-virginica"
+    ],
+    [
+      7.9,
+      3.8,
+      6.4,
+      2,
+      "Iris-virginica"
+    ],
+    [
+      6.4,
+      2.8,
+      5.6,
+      2.2,
+      "Iris-virginica"
+    ],
+    [
+      6.3,
+      2.8,
+      5.1,
+      1.5,
+      "Iris-virginica"
+    ],
+    [
+      6.1,
+      2.6,
+      5.6,
+      1.4,
+      "Iris-virginica"
+    ],
+    [
+      7.7,
+      3,
+      6.1,
+      2.3,
+      "Iris-virginica"
+    ],
+    [
+      6.3,
+      3.4,
+      5.6,
+      2.4,
+      "Iris-virginica"
+    ],
+    [
+      6.4,
+      3.1,
+      5.5,
+      1.8,
+      "Iris-virginica"
+    ],
+    [
+      6,
+      3,
+      4.8,
+      1.8,
+      "Iris-virginica"
+    ],
+    [
+      6.9,
+      3.1,
+      5.4,
+      2.1,
+      "Iris-virginica"
+    ],
+    [
+      6.7,
+      3.1,
+      5.6,
+      2.4,
+      "Iris-virginica"
+    ],
+    [
+      6.9,
+      3.1,
+      5.1,
+      2.3,
+      "Iris-virginica"
+    ],
+    [
+      5.8,
+      2.7,
+      5.1,
+      1.9,
+      "Iris-virginica"
+    ],
+    [
+      6.8,
+      3.2,
+      5.9,
+      2.3,
+      "Iris-virginica"
+    ],
+    [
+      6.7,
+      3.3,
+      5.7,
+      2.5,
+      "Iris-virginica"
+    ],
+    [
+      6.7,
+      3,
+      5.2,
+      2.3,
+      "Iris-virginica"
+    ],
+    [
+      6.3,
+      2.5,
+      5,
+      1.9,
+      "Iris-virginica"
+    ],
+    [
+      6.5,
+      3,
+      5.2,
+      2,
+      "Iris-virginica"
+    ],
+    [
+      6.2,
+      3.4,
+      5.4,
+      2.3,
+      "Iris-virginica"
+    ],
+    [
+      5.9,
+      3,
+      5.1,
+      1.8,
+      "Iris-virginica"
+    ]
+  ],
+  "inputs": [
+    1,
+    4
+  ]
+};
\ No newline at end of file
diff --git a/src/automl.test.ts b/src/automl.test.ts
new file mode 100644
index 0000000..4784a66
--- /dev/null
+++ b/src/automl.test.ts
@@ -0,0 +1,107 @@
+import { TrainingProgressCallback, TrainingProgressUpdate } from './constants';
+// import { getDate } from '../index';
+import { getDateField, getInputs, getModelOptions, getModelTrainingOptions, splitTrainingPredictionData,getInputsOutputsFromDataset , getSpreadsheetDataset, getModel} from './jsonm';
+import * as JSONM from './index';
+import { ModelTypes } from './model';
+import { toBeWithinRange, } from './jest.test';
+expect.extend({ toBeWithinRange });
+import {autoMLdata} from './__test__/mock_automl_data'
+import { Data } from '@jsonstack/data/src/DataSet';
+import { setBackend } from './tensorflow_singleton';
+import * as tf from '@tensorflow/tfjs-node';
+setBackend(tf)
+
+// request->jsonm->column matrix
+
+
+describe('AutoML Sheets Test',()=>{
+  describe('getInputsOutputsFromDataset',()=>{
+    const labels = ['col1','col2','col3','col4','col5']
+    const dataset = [ 
+      {col1:1, col2:2, col3:3, col4:4, col5: 5}, 
+      {col1:10, col2:20, col3:30, col4:undefined, col5: undefined}, 
+    ]
+    it('should generate inputs and outputs',()=>{
+      const io=getInputsOutputsFromDataset({labels,dataset});
+      expect(io.inputs).toMatchObject([ 'col1', 'col2', 'col3' ]);
+      expect(io.outputs).toMatchObject([ 'col4', 'col5' ]);
+    })
+    it('should use supplied inputs and outputs',()=>{
+      const io=getInputsOutputsFromDataset({labels,dataset,inputs:['in1','in2'],outputs:['out1']});
+      expect(io.inputs).toMatchObject([ 'in1','in2' ]);
+      expect(io.outputs).toMatchObject([ 'out1' ]);
+  
+    })
+  })
+  describe('getSpreadsheetDataset',()=>{
+    it('should generate json dataset from spreadsheet data',()=>{
+      const data =[
+        ['col1','col2','col3'],
+        [1,  2,  3,  ],
+        [10, 20, 30, ]
+      ];
+      const shd = getSpreadsheetDataset(data);
+      expect(shd.labels).toMatchObject(['col1', 'col2', 'col3'])
+      expect(shd.vectors).toMatchObject([ [1,2,3], [10,20,30], ])
+      expect(shd.dataset).toMatchObject([ {col1: 1, col2: 2, col3: 3,}, {col1: 10, col2: 20, col3: 30}, ] )
+    });
+    it('should work if you supply custom column labels',()=>{
+      const data =[
+        [1,  2,  3,  ],
+        [10, 20, 30, ]
+      ];
+      const shd1 = getSpreadsheetDataset(data,{columnLabels:['col1', 'col2', 'col3']});
+      expect(shd1.labels).toMatchObject(['col1', 'col2', 'col3'])
+      expect(shd1.vectors).toMatchObject([ [1,2,3], [10,20,30], ])
+    })
+    it('should work if you do not supply custom column labels',()=>{
+      const data =[
+        [1,  2,  3,  ],
+        [10, 20, 30, ]
+      ];
+      const shd1 = getSpreadsheetDataset(data,);
+      expect(shd1.labels).toMatchObject(['column_1', 'column_2', 'column_3' ])
+      expect(shd1.vectors).toMatchObject([ [1,2,3], [10,20,30], ])
+      expect(shd1.dataset).toMatchObject([ {column_1: 1, column_2: 2, column_3: 3,}, {column_1: 10, column_2: 20, column_3: 30}, ] )
+    });
+  })
+  describe('mock end to end example',()=>{
+
+    it('should run a basic test from spreadsheet data',async ()=>{
+      const on_progress = ({ 
+        completion_percentage, 
+        loss,
+        epoch, 
+        logs, 
+        status, 
+        defaultLog, 
+      }:TrainingProgressUpdate)=>{
+        if(status!=='training') console.log({status,defaultLog})
+      }
+      // const vectors = autoMLdata?.data.concat([]);
+      // const labels = vectors?.splice(0,1)[0] as string[];
+      // const dataset = JSONM.Data.DataSet.reverseColumnMatrix({labels,vectors});\
+      //@ts-ignore
+      const{vectors,labels,dataset}=getSpreadsheetDataset(autoMLdata?.data,{on_progress});
+      //@ts-ignore
+      const {columns,inputs,outputs} = JSONM.getInputsOutputsFromDataset({dataset,labels, on_progress});
+      const {trainingData,predictionData} = await splitTrainingPredictionData({
+        inputs,
+        outputs,
+        data: dataset,
+      });
+      // console.log({trainingData,predictionData});
+      const SpreadsheetModel = await getModel({
+        type:'prediction',
+        inputs,
+        outputs,
+        dataset:trainingData,
+        //@ts-ignore
+        on_progress,
+      });
+      await SpreadsheetModel.trainModel();
+  
+
+    },30000)
+  })
+});
diff --git a/src/constants.ts b/src/constants.ts
index 37595b8..eb39584 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -129,7 +129,7 @@ export type TrainingProgressUpdate = {
     loss: number
   };
   status: string;
-  defaultLog?: boolean;
+  defaultLog?: boolean|any;
 }
 
 export function training_on_progress({ completion_percentage, loss, epoch, status, logs, defaultLog=true }:TrainingProgressUpdate):void {
diff --git a/src/index.ts b/src/index.ts
index 505a468..2341ee2 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -10,6 +10,9 @@ export {
   getModel,
   getModelFromJSONM,
   getModelTrainingOptions,
+  splitTrainingPredictionData,
+  getInputsOutputsFromDataset,
+  getSpreadsheetDataset,
 } from './jsonm'
 export {
   getDataSet,
diff --git a/src/jsonm.test.ts b/src/jsonm.test.ts
index 685ac09..3ec201e 100644
--- a/src/jsonm.test.ts
+++ b/src/jsonm.test.ts
@@ -64,6 +64,7 @@ describe('JSONM',()=>{
   })
   describe('splitTrainingPredictionData',()=>{
     it('should handle empty inputs', async()=>{
+      // deepcode ignore MissingArgument/test: used in jest
       const {trainingData,predictionData} = await splitTrainingPredictionData();
       expect(trainingData).toMatchObject([]);
       expect(predictionData).toMatchObject([]);
diff --git a/src/jsonm.ts b/src/jsonm.ts
index 86117fe..cbb317c 100644
--- a/src/jsonm.ts
+++ b/src/jsonm.ts
@@ -1,5 +1,5 @@
 import { ModelX, ModelContext, ModelTypes, ModelConfiguration, } from './model';
-import { Data, Datum, } from '@jsonstack/data';
+import { Data, Datum, DataSet, util as DataSetUtil } from '@jsonstack/data';
 import { TrainingProgressCallback, } from './constants';
 
 import { JDS, getDataSet, } from './dataset';
@@ -38,6 +38,31 @@ export type ModelDataOptions = {
   inputs: string[];
   outputs: string[];
   data: JDS | Data;
+  on_progress?:TrainingProgressCallback,
+}
+
+export type getSpreadsheetDatasetOptions={
+  on_progress?:TrainingProgressCallback,
+  columnLabels?:string[]
+}
+
+export type columnStat ={
+  label: string,
+  labelValues: any[],
+  dataType:string,
+  mean?: number,
+  min?:number,
+  max?:number,
+  values:number
+}
+
+export type getInputsOutputsFromDatasetOptions ={
+  dataset:Data,
+  labels:string[],
+  inputs?:string[],
+  outputs?:string[],
+  forceStats?:boolean,
+  on_progress?:TrainingProgressCallback,
 }
 
 export async function getModelFromJSONM(jml?: JML): Promise<ModelX> {
@@ -60,23 +85,6 @@ export async function getModelFromJSONM(jml?: JML): Promise<ModelX> {
 
 export const getModel = getModelFromJSONM;
 
-/**
- * Splits into training and prediction data
- * @param options.inputs - list of inputs
- * @param options.outputs - list of outputs
- * @param options.data - data to split into training and prediction data
- * @returns two objects (trainingData and predictionData)
- */
-export async function splitTrainingPredictionData(options?:ModelDataOptions): Promise<{trainingData:Data, predictionData: Data}>{
-  const dataset = await getDataSet(options?.data);
-  const {trainingData, predictionData} = dataset.reduce((result,datum)=>{
-    if(options?.outputs?.filter((output)=>datum[output]===undefined || datum[output]===null
-    ).length) result.predictionData.push(datum);
-    else result.trainingData.push(datum);
-    return result;
-  },{trainingData:[],predictionData:[],})
-  return {trainingData,predictionData}
-}
 
 export function getModelTrainingOptions({ accuracy_target }: { accuracy_target?: number;} ={}) {
   return {
@@ -120,4 +128,169 @@ export function getModelOptions(jml?:JML,datum?:Datum){
     ...defaultModelOptions,
     ...jml?.model_options,
   }
+}
+
+/**
+ * Splits into training and prediction data
+ * @param options.inputs - list of inputs
+ * @param options.outputs - list of outputs
+ * @param options.data - data to split into training and prediction data
+ * @returns two objects (trainingData and predictionData)
+ */
+ export async function splitTrainingPredictionData(options?:ModelDataOptions): Promise<{trainingData:Data, predictionData: Data}>{
+  if(typeof options?.on_progress==='function') options.on_progress({
+    status: 'preprocessing',
+    loss: undefined,
+    completion_percentage: undefined,
+    epoch: undefined,
+    logs: undefined,
+    defaultLog: {
+      detail: 'generating training data'
+    }
+  }) 
+  const dataset = await getDataSet(options?.data);
+  const {trainingData, predictionData} = dataset.reduce((result,datum)=>{
+    if(options?.outputs?.filter((output)=> isEmpty(datum[output])
+    ).length) result.predictionData.push(datum);
+    else result.trainingData.push(datum);
+    return result;
+  },{trainingData:[],predictionData:[],})
+  return {trainingData,predictionData}
+}
+
+
+/**
+ * function that tests for empty values
+ * @param val 
+ * @returns {boolean}
+ */
+export function isEmpty(val):boolean{
+  return val === undefined || val === null || val==='';
+}
+
+/**
+ * returns inputs and outputs from json data and labels by iterating through the data, if there are rows with missing values it assumes that those are output columns
+ * e.g.,
+ * labels = ['col1','col2','col3','col4','col5']
+ * dataset = [ 
+ *   {col1:1, col2:2, col3:3, col4:4, col5: 5}, 
+ *   {col1:10, col2:20, col3:30, col4:undefined, col5: undefined}, 
+ * ]
+ * 
+ * it will assume
+ * inputs=['col1','col2','col3']
+ * outputs=['col4','col5']
+ * 
+ * if forcestats is set, it will run stats on each column like  mean, min, max
+ * @param param0 
+ * @returns 
+ */
+
+export function getInputsOutputsFromDataset({dataset, labels, inputs=[],outputs=[], forceStats=false, on_progress}:getInputsOutputsFromDatasetOptions){
+  if(typeof on_progress==='function') on_progress({
+    status: 'preprocessing',
+    loss: undefined,
+    completion_percentage: undefined,
+    epoch: undefined,
+    logs: undefined,
+    defaultLog: {
+      detail: 'configuring inputs and outputs'
+    }
+  }) 
+  if(inputs?.length && outputs?.length && forceStats===false){
+    return {
+      inputs,
+      outputs,
+      columns: undefined
+    }
+  } else{
+      const columns:columnStat[] = labels.reduce((stats:columnStat[],label)=>{
+        const labelValues = DataSet.columnArray(label,{
+          data: dataset,
+          filter: val=> !isEmpty(val)
+        });
+        const dataType = typeof labelValues[0];
+        const mean = dataType==='number'?DataSetUtil.mean(labelValues):undefined;
+        const min = DataSetUtil.min(labelValues);
+        const max = DataSetUtil.max(labelValues);
+        stats.push({
+          label,
+          labelValues,
+          dataType,
+          mean,
+          min,
+          max,
+          values: labelValues.length
+        });
+        return stats;
+      },[]);
+      const maxColumnValue = columns.sort((a,b)=>b.values-a.values)[0].values;
+      const [derivedInputs,derivedOutputs]:[string[],string[]] = columns.reduce((result,columnStat)=>{
+        
+        if(columnStat.values<maxColumnValue) result[1].push(columnStat.label);
+        else result[0].push(columnStat.label);
+        result[0]//inputs
+        result[1]//outputs
+        return result;
+      },[[],[]] as [string[],string[]]);
+      // console.log({columns,inputs,outputs})
+      return {
+        columns,
+        inputs:derivedInputs,
+        outputs:derivedOutputs,
+      };  
+  }
+}
+
+/**
+ * takes data from a spreadsheet and return an object for preprocessing. The input data usually includes the header, e.g., 
+ * [
+ *  ['col1','col2','col3'],
+ *  [1,  2,  3,  ]
+ *  [10, 20, 30, ]
+ * ] 
+ * 
+ * and returns
+ * 
+ * {
+ *  labels:[ 'col1', 'col2', 'col3', ],
+ *  vectors: [ [1,2,3], [10,20,30], ]
+ *  dataset: [ {col1: 1, col2: 2, col3: 3,}, {col1: 10, col2: 20, col3: 30}, ] 
+ * }
+ * @param data 
+ * @returns {{vectors:number[][],labels:string[],dataset:object[]}}
+ */
+export function getSpreadsheetDataset(data, options?:getSpreadsheetDatasetOptions){
+  if(typeof options?.on_progress==='function') options.on_progress({
+    status: 'preprocessing',
+    loss: undefined,
+    completion_percentage: undefined,
+    epoch: undefined,
+    logs: undefined,
+    defaultLog: {
+      detail: 'converting spreadsheet data into json dataset'
+    }
+  }) 
+  let derivedLabels:string[]=[];
+  let labelsAsFirstRow = options?.columnLabels
+    ? false 
+    : true;
+  const vectors = data.concat([]);
+
+  if(data?.length>0 && !options?.columnLabels &&(typeof data[0][0] === typeof data[1][0])){ 
+    labelsAsFirstRow = false;
+    derivedLabels = data[0].reduce((result,item,index)=>{
+      result.push(`column_${index+1}`)
+      return result;
+    },[]);
+  } else if(!options?.columnLabels) derivedLabels = vectors?.splice(0,1)[0] as string[];
+  
+  const labels = options?.columnLabels || derivedLabels;
+  const dataset = DataSet.reverseColumnMatrix({labels,vectors});
+  return {
+    vectors,
+    labels,
+    dataset,
+    labelsAsFirstRow,
+  }
 }
\ No newline at end of file