From 4da369451688971cfdd9ba3f3b16a6e3f5f2135c Mon Sep 17 00:00:00 2001 From: Yaw Joseph Etse Date: Sun, 31 Jul 2022 13:51:24 -0400 Subject: [PATCH] feat: adding support to generate models from google sheets --- src/__test__/mock_automl_data.ts | 1074 ++++++++++++++++++++++++++++++ src/automl.test.ts | 107 +++ src/constants.ts | 2 +- src/index.ts | 3 + src/jsonm.test.ts | 1 + src/jsonm.ts | 209 +++++- 6 files changed, 1377 insertions(+), 19 deletions(-) create mode 100644 src/__test__/mock_automl_data.ts create mode 100644 src/automl.test.ts diff --git a/src/__test__/mock_automl_data.ts b/src/__test__/mock_automl_data.ts new file mode 100644 index 0000000..4ed7c8e --- /dev/null +++ b/src/__test__/mock_automl_data.ts @@ -0,0 +1,1074 @@ +export const autoMLdata = { + "outputs": 5, + "rowRange": [ + 1, + 151 + ], + "colRange": [ + 1, + 5 + ], + "data": [ + [ + "sepal_length_cm", + "sepal_width_cm", + "petal_length_cm", + "petal_width_cm", + "plant" + ], + [ + 5.1, + 3.5, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 4.9, + 3, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 4.7, + 3.2, + 1.3, + 0.2, + "Iris-setosa" + ], + [ + 4.6, + 3.1, + 1.5, + 0.2, + "Iris-setosa" + ], + [ + 5, + 3.6, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 5.4, + 3.9, + 1.7, + 0.4, + "Iris-setosa" + ], + [ + 4.6, + 3.4, + 1.4, + 0.3, + "Iris-setosa" + ], + [ + 5, + 3.4, + 1.5, + 0.2, + "Iris-setosa" + ], + [ + 4.4, + 2.9, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 4.9, + 3.1, + 1.5, + 0.1, + "Iris-setosa" + ], + [ + 5.4, + 3.7, + 1.5, + 0.2, + "Iris-setosa" + ], + [ + 4.8, + 3.4, + 1.6, + 0.2, + "Iris-setosa" + ], + [ + 4.8, + 3, + 1.4, + 0.1, + "Iris-setosa" + ], + [ + 4.3, + 3, + 1.1, + 0.1, + "Iris-setosa" + ], + [ + 5.8, + 4, + 1.2, + 0.2, + "Iris-setosa" + ], + [ + 5.7, + 4.4, + 1.5, + 0.4, + "Iris-setosa" + ], + [ + 5.4, + 3.9, + 1.3, + 0.4, + "Iris-setosa" + ], + [ + 5.1, + 3.5, + 1.4, + 0.3, + "Iris-setosa" + ], + [ + 5.7, + 3.8, + 1.7, + 0.3, + "Iris-setosa" + ], + [ + 5.1, + 3.8, + 1.5, + 0.3, + "" + ], + [ + 5.4, + 3.4, + 1.7, + 0.2, + "" + ], + [ + 5.1, + 3.7, + 1.5, + 0.4, + "" + ], + [ + 4.6, + 3.6, + 1, + 0.2, + "" + ], + [ + 5.1, + 3.3, + 1.7, + 0.5, + "" + ], + [ + 4.8, + 3.4, + 1.9, + 0.2, + "" + ], + [ + 5, + 3, + 1.6, + 0.2, + "" + ], + [ + 5, + 3.4, + 1.6, + 0.4, + "" + ], + [ + 5.2, + 3.5, + 1.5, + 0.2, + "" + ], + [ + 5.2, + 3.4, + 1.4, + 0.2, + "" + ], + [ + 4.7, + 3.2, + 1.6, + 0.2, + "" + ], + [ + 4.8, + 3.1, + 1.6, + 0.2, + "" + ], + [ + 5.4, + 3.4, + 1.5, + 0.4, + "" + ], + [ + 5.2, + 4.1, + 1.5, + 0.1, + "" + ], + [ + 5.5, + 4.2, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 4.9, + 3.1, + 1.5, + 0.1, + "Iris-setosa" + ], + [ + 5, + 3.2, + 1.2, + 0.2, + "Iris-setosa" + ], + [ + 5.5, + 3.5, + 1.3, + 0.2, + "Iris-setosa" + ], + [ + 4.9, + 3.1, + 1.5, + 0.1, + "Iris-setosa" + ], + [ + 4.4, + 3, + 1.3, + 0.2, + "Iris-setosa" + ], + [ + 5.1, + 3.4, + 1.5, + 0.2, + "Iris-setosa" + ], + [ + 5, + 3.5, + 1.3, + 0.3, + "Iris-setosa" + ], + [ + 4.5, + 2.3, + 1.3, + 0.3, + "Iris-setosa" + ], + [ + 4.4, + 3.2, + 1.3, + 0.2, + "Iris-setosa" + ], + [ + 5, + 3.5, + 1.6, + 0.6, + "Iris-setosa" + ], + [ + 5.1, + 3.8, + 1.9, + 0.4, + "Iris-setosa" + ], + [ + 4.8, + 3, + 1.4, + 0.3, + "Iris-setosa" + ], + [ + 5.1, + 3.8, + 1.6, + 0.2, + "Iris-setosa" + ], + [ + 4.6, + 3.2, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 5.3, + 3.7, + 1.5, + 0.2, + "Iris-setosa" + ], + [ + 5, + 3.3, + 1.4, + 0.2, + "Iris-setosa" + ], + [ + 7, + 3.2, + 4.7, + 1.4, + "Iris-versicolor" + ], + [ + 6.4, + 3.2, + 4.5, + 1.5, + "Iris-versicolor" + ], + [ + 6.9, + 3.1, + 4.9, + 1.5, + "Iris-versicolor" + ], + [ + 5.5, + 2.3, + 4, + 1.3, + "Iris-versicolor" + ], + [ + 6.5, + 2.8, + 4.6, + 1.5, + "Iris-versicolor" + ], + [ + 5.7, + 2.8, + 4.5, + 1.3, + "Iris-versicolor" + ], + [ + 6.3, + 3.3, + 4.7, + 1.6, + "Iris-versicolor" + ], + [ + 4.9, + 2.4, + 3.3, + 1, + "Iris-versicolor" + ], + [ + 6.6, + 2.9, + 4.6, + 1.3, + "Iris-versicolor" + ], + [ + 5.2, + 2.7, + 3.9, + 1.4, + "Iris-versicolor" + ], + [ + 5, + 2, + 3.5, + 1, + "Iris-versicolor" + ], + [ + 5.9, + 3, + 4.2, + 1.5, + "" + ], + [ + 6, + 2.2, + 4, + 1, + "" + ], + [ + 6.1, + 2.9, + 4.7, + 1.4, + "" + ], + [ + 5.6, + 2.9, + 3.6, + 1.3, + "" + ], + [ + 6.7, + 3.1, + 4.4, + 1.4, + "" + ], + [ + 5.6, + 3, + 4.5, + 1.5, + "" + ], + [ + 5.8, + 2.7, + 4.1, + 1, + "" + ], + [ + 6.2, + 2.2, + 4.5, + 1.5, + "" + ], + [ + 5.6, + 2.5, + 3.9, + 1.1, + "" + ], + [ + 5.9, + 3.2, + 4.8, + 1.8, + "" + ], + [ + 6.1, + 2.8, + 4, + 1.3, + "" + ], + [ + 6.3, + 2.5, + 4.9, + 1.5, + "" + ], + [ + 6.1, + 2.8, + 4.7, + 1.2, + "Iris-versicolor" + ], + [ + 6.4, + 2.9, + 4.3, + 1.3, + "Iris-versicolor" + ], + [ + 6.6, + 3, + 4.4, + 1.4, + "Iris-versicolor" + ], + [ + 6.8, + 2.8, + 4.8, + 1.4, + "Iris-versicolor" + ], + [ + 6.7, + 3, + 5, + 1.7, + "Iris-versicolor" + ], + [ + 6, + 2.9, + 4.5, + 1.5, + "Iris-versicolor" + ], + [ + 5.7, + 2.6, + 3.5, + 1, + "Iris-versicolor" + ], + [ + 5.5, + 2.4, + 3.8, + 1.1, + "Iris-versicolor" + ], + [ + 5.5, + 2.4, + 3.7, + 1, + "Iris-versicolor" + ], + [ + 5.8, + 2.7, + 3.9, + 1.2, + "Iris-versicolor" + ], + [ + 6, + 2.7, + 5.1, + 1.6, + "Iris-versicolor" + ], + [ + 5.4, + 3, + 4.5, + 1.5, + "Iris-versicolor" + ], + [ + 6, + 3.4, + 4.5, + 1.6, + "Iris-versicolor" + ], + [ + 6.7, + 3.1, + 4.7, + 1.5, + "Iris-versicolor" + ], + [ + 6.3, + 2.3, + 4.4, + 1.3, + "Iris-versicolor" + ], + [ + 5.6, + 3, + 4.1, + 1.3, + "" + ], + [ + 5.5, + 2.5, + 4, + 1.3, + "" + ], + [ + 5.5, + 2.6, + 4.4, + 1.2, + "" + ], + [ + 6.1, + 3, + 4.6, + 1.4, + "" + ], + [ + 5.8, + 2.6, + 4, + 1.2, + "" + ], + [ + 5, + 2.3, + 3.3, + 1, + "" + ], + [ + 5.6, + 2.7, + 4.2, + 1.3, + "" + ], + [ + 5.7, + 3, + 4.2, + 1.2, + "" + ], + [ + 5.7, + 2.9, + 4.2, + 1.3, + "" + ], + [ + 6.2, + 2.9, + 4.3, + 1.3, + "" + ], + [ + 5.1, + 2.5, + 3, + 1.1, + "" + ], + [ + 5.7, + 2.8, + 4.1, + 1.3, + "" + ], + [ + 6.3, + 3.3, + 6, + 2.5, + "" + ], + [ + 5.8, + 2.7, + 5.1, + 1.9, + "" + ], + [ + 7.1, + 3, + 5.9, + 2.1, + "" + ], + [ + 6.3, + 2.9, + 5.6, + 1.8, + "" + ], + [ + 6.5, + 3, + 5.8, + 2.2, + "" + ], + [ + 7.6, + 3, + 6.6, + 2.1, + "" + ], + [ + 4.9, + 2.5, + 4.5, + 1.7, + "" + ], + [ + 7.3, + 2.9, + 6.3, + 1.8, + "Iris-virginica" + ], + [ + 6.7, + 2.5, + 5.8, + 1.8, + "Iris-virginica" + ], + [ + 7.2, + 3.6, + 6.1, + 2.5, + "Iris-virginica" + ], + [ + 6.5, + 3.2, + 5.1, + 2, + "Iris-virginica" + ], + [ + 6.4, + 2.7, + 5.3, + 1.9, + "Iris-virginica" + ], + [ + 6.8, + 3, + 5.5, + 2.1, + "Iris-virginica" + ], + [ + 5.7, + 2.5, + 5, + 2, + "Iris-virginica" + ], + [ + 5.8, + 2.8, + 5.1, + 2.4, + "Iris-virginica" + ], + [ + 6.4, + 3.2, + 5.3, + 2.3, + "Iris-virginica" + ], + [ + 6.5, + 3, + 5.5, + 1.8, + "Iris-virginica" + ], + [ + 7.7, + 3.8, + 6.7, + 2.2, + "Iris-virginica" + ], + [ + 7.7, + 2.6, + 6.9, + 2.3, + "Iris-virginica" + ], + [ + 6, + 2.2, + 5, + 1.5, + "Iris-virginica" + ], + [ + 6.9, + 3.2, + 5.7, + 2.3, + "Iris-virginica" + ], + [ + 5.6, + 2.8, + 4.9, + 2, + "Iris-virginica" + ], + [ + 7.7, + 2.8, + 6.7, + 2, + "Iris-virginica" + ], + [ + 6.3, + 2.7, + 4.9, + 1.8, + "Iris-virginica" + ], + [ + 6.7, + 3.3, + 5.7, + 2.1, + "Iris-virginica" + ], + [ + 7.2, + 3.2, + 6, + 1.8, + "Iris-virginica" + ], + [ + 6.2, + 2.8, + 4.8, + 1.8, + "Iris-virginica" + ], + [ + 6.1, + 3, + 4.9, + 1.8, + "Iris-virginica" + ], + [ + 6.4, + 2.8, + 5.6, + 2.1, + "Iris-virginica" + ], + [ + 7.2, + 3, + 5.8, + 1.6, + "Iris-virginica" + ], + [ + 7.4, + 2.8, + 6.1, + 1.9, + "Iris-virginica" + ], + [ + 7.9, + 3.8, + 6.4, + 2, + "Iris-virginica" + ], + [ + 6.4, + 2.8, + 5.6, + 2.2, + "Iris-virginica" + ], + [ + 6.3, + 2.8, + 5.1, + 1.5, + "Iris-virginica" + ], + [ + 6.1, + 2.6, + 5.6, + 1.4, + "Iris-virginica" + ], + [ + 7.7, + 3, + 6.1, + 2.3, + "Iris-virginica" + ], + [ + 6.3, + 3.4, + 5.6, + 2.4, + "Iris-virginica" + ], + [ + 6.4, + 3.1, + 5.5, + 1.8, + "Iris-virginica" + ], + [ + 6, + 3, + 4.8, + 1.8, + "Iris-virginica" + ], + [ + 6.9, + 3.1, + 5.4, + 2.1, + "Iris-virginica" + ], + [ + 6.7, + 3.1, + 5.6, + 2.4, + "Iris-virginica" + ], + [ + 6.9, + 3.1, + 5.1, + 2.3, + "Iris-virginica" + ], + [ + 5.8, + 2.7, + 5.1, + 1.9, + "Iris-virginica" + ], + [ + 6.8, + 3.2, + 5.9, + 2.3, + "Iris-virginica" + ], + [ + 6.7, + 3.3, + 5.7, + 2.5, + "Iris-virginica" + ], + [ + 6.7, + 3, + 5.2, + 2.3, + "Iris-virginica" + ], + [ + 6.3, + 2.5, + 5, + 1.9, + "Iris-virginica" + ], + [ + 6.5, + 3, + 5.2, + 2, + "Iris-virginica" + ], + [ + 6.2, + 3.4, + 5.4, + 2.3, + "Iris-virginica" + ], + [ + 5.9, + 3, + 5.1, + 1.8, + "Iris-virginica" + ] + ], + "inputs": [ + 1, + 4 + ] +}; \ No newline at end of file diff --git a/src/automl.test.ts b/src/automl.test.ts new file mode 100644 index 0000000..4784a66 --- /dev/null +++ b/src/automl.test.ts @@ -0,0 +1,107 @@ +import { TrainingProgressCallback, TrainingProgressUpdate } from './constants'; +// import { getDate } from '../index'; +import { getDateField, getInputs, getModelOptions, getModelTrainingOptions, splitTrainingPredictionData,getInputsOutputsFromDataset , getSpreadsheetDataset, getModel} from './jsonm'; +import * as JSONM from './index'; +import { ModelTypes } from './model'; +import { toBeWithinRange, } from './jest.test'; +expect.extend({ toBeWithinRange }); +import {autoMLdata} from './__test__/mock_automl_data' +import { Data } from '@jsonstack/data/src/DataSet'; +import { setBackend } from './tensorflow_singleton'; +import * as tf from '@tensorflow/tfjs-node'; +setBackend(tf) + +// request->jsonm->column matrix + + +describe('AutoML Sheets Test',()=>{ + describe('getInputsOutputsFromDataset',()=>{ + const labels = ['col1','col2','col3','col4','col5'] + const dataset = [ + {col1:1, col2:2, col3:3, col4:4, col5: 5}, + {col1:10, col2:20, col3:30, col4:undefined, col5: undefined}, + ] + it('should generate inputs and outputs',()=>{ + const io=getInputsOutputsFromDataset({labels,dataset}); + expect(io.inputs).toMatchObject([ 'col1', 'col2', 'col3' ]); + expect(io.outputs).toMatchObject([ 'col4', 'col5' ]); + }) + it('should use supplied inputs and outputs',()=>{ + const io=getInputsOutputsFromDataset({labels,dataset,inputs:['in1','in2'],outputs:['out1']}); + expect(io.inputs).toMatchObject([ 'in1','in2' ]); + expect(io.outputs).toMatchObject([ 'out1' ]); + + }) + }) + describe('getSpreadsheetDataset',()=>{ + it('should generate json dataset from spreadsheet data',()=>{ + const data =[ + ['col1','col2','col3'], + [1, 2, 3, ], + [10, 20, 30, ] + ]; + const shd = getSpreadsheetDataset(data); + expect(shd.labels).toMatchObject(['col1', 'col2', 'col3']) + expect(shd.vectors).toMatchObject([ [1,2,3], [10,20,30], ]) + expect(shd.dataset).toMatchObject([ {col1: 1, col2: 2, col3: 3,}, {col1: 10, col2: 20, col3: 30}, ] ) + }); + it('should work if you supply custom column labels',()=>{ + const data =[ + [1, 2, 3, ], + [10, 20, 30, ] + ]; + const shd1 = getSpreadsheetDataset(data,{columnLabels:['col1', 'col2', 'col3']}); + expect(shd1.labels).toMatchObject(['col1', 'col2', 'col3']) + expect(shd1.vectors).toMatchObject([ [1,2,3], [10,20,30], ]) + }) + it('should work if you do not supply custom column labels',()=>{ + const data =[ + [1, 2, 3, ], + [10, 20, 30, ] + ]; + const shd1 = getSpreadsheetDataset(data,); + expect(shd1.labels).toMatchObject(['column_1', 'column_2', 'column_3' ]) + expect(shd1.vectors).toMatchObject([ [1,2,3], [10,20,30], ]) + expect(shd1.dataset).toMatchObject([ {column_1: 1, column_2: 2, column_3: 3,}, {column_1: 10, column_2: 20, column_3: 30}, ] ) + }); + }) + describe('mock end to end example',()=>{ + + it('should run a basic test from spreadsheet data',async ()=>{ + const on_progress = ({ + completion_percentage, + loss, + epoch, + logs, + status, + defaultLog, + }:TrainingProgressUpdate)=>{ + if(status!=='training') console.log({status,defaultLog}) + } + // const vectors = autoMLdata?.data.concat([]); + // const labels = vectors?.splice(0,1)[0] as string[]; + // const dataset = JSONM.Data.DataSet.reverseColumnMatrix({labels,vectors});\ + //@ts-ignore + const{vectors,labels,dataset}=getSpreadsheetDataset(autoMLdata?.data,{on_progress}); + //@ts-ignore + const {columns,inputs,outputs} = JSONM.getInputsOutputsFromDataset({dataset,labels, on_progress}); + const {trainingData,predictionData} = await splitTrainingPredictionData({ + inputs, + outputs, + data: dataset, + }); + // console.log({trainingData,predictionData}); + const SpreadsheetModel = await getModel({ + type:'prediction', + inputs, + outputs, + dataset:trainingData, + //@ts-ignore + on_progress, + }); + await SpreadsheetModel.trainModel(); + + + },30000) + }) +}); diff --git a/src/constants.ts b/src/constants.ts index 37595b8..eb39584 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -129,7 +129,7 @@ export type TrainingProgressUpdate = { loss: number }; status: string; - defaultLog?: boolean; + defaultLog?: boolean|any; } export function training_on_progress({ completion_percentage, loss, epoch, status, logs, defaultLog=true }:TrainingProgressUpdate):void { diff --git a/src/index.ts b/src/index.ts index 505a468..2341ee2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -10,6 +10,9 @@ export { getModel, getModelFromJSONM, getModelTrainingOptions, + splitTrainingPredictionData, + getInputsOutputsFromDataset, + getSpreadsheetDataset, } from './jsonm' export { getDataSet, diff --git a/src/jsonm.test.ts b/src/jsonm.test.ts index 685ac09..3ec201e 100644 --- a/src/jsonm.test.ts +++ b/src/jsonm.test.ts @@ -64,6 +64,7 @@ describe('JSONM',()=>{ }) describe('splitTrainingPredictionData',()=>{ it('should handle empty inputs', async()=>{ + // deepcode ignore MissingArgument/test: used in jest const {trainingData,predictionData} = await splitTrainingPredictionData(); expect(trainingData).toMatchObject([]); expect(predictionData).toMatchObject([]); diff --git a/src/jsonm.ts b/src/jsonm.ts index 86117fe..cbb317c 100644 --- a/src/jsonm.ts +++ b/src/jsonm.ts @@ -1,5 +1,5 @@ import { ModelX, ModelContext, ModelTypes, ModelConfiguration, } from './model'; -import { Data, Datum, } from '@jsonstack/data'; +import { Data, Datum, DataSet, util as DataSetUtil } from '@jsonstack/data'; import { TrainingProgressCallback, } from './constants'; import { JDS, getDataSet, } from './dataset'; @@ -38,6 +38,31 @@ export type ModelDataOptions = { inputs: string[]; outputs: string[]; data: JDS | Data; + on_progress?:TrainingProgressCallback, +} + +export type getSpreadsheetDatasetOptions={ + on_progress?:TrainingProgressCallback, + columnLabels?:string[] +} + +export type columnStat ={ + label: string, + labelValues: any[], + dataType:string, + mean?: number, + min?:number, + max?:number, + values:number +} + +export type getInputsOutputsFromDatasetOptions ={ + dataset:Data, + labels:string[], + inputs?:string[], + outputs?:string[], + forceStats?:boolean, + on_progress?:TrainingProgressCallback, } export async function getModelFromJSONM(jml?: JML): Promise { @@ -60,23 +85,6 @@ export async function getModelFromJSONM(jml?: JML): Promise { export const getModel = getModelFromJSONM; -/** - * Splits into training and prediction data - * @param options.inputs - list of inputs - * @param options.outputs - list of outputs - * @param options.data - data to split into training and prediction data - * @returns two objects (trainingData and predictionData) - */ -export async function splitTrainingPredictionData(options?:ModelDataOptions): Promise<{trainingData:Data, predictionData: Data}>{ - const dataset = await getDataSet(options?.data); - const {trainingData, predictionData} = dataset.reduce((result,datum)=>{ - if(options?.outputs?.filter((output)=>datum[output]===undefined || datum[output]===null - ).length) result.predictionData.push(datum); - else result.trainingData.push(datum); - return result; - },{trainingData:[],predictionData:[],}) - return {trainingData,predictionData} -} export function getModelTrainingOptions({ accuracy_target }: { accuracy_target?: number;} ={}) { return { @@ -120,4 +128,169 @@ export function getModelOptions(jml?:JML,datum?:Datum){ ...defaultModelOptions, ...jml?.model_options, } +} + +/** + * Splits into training and prediction data + * @param options.inputs - list of inputs + * @param options.outputs - list of outputs + * @param options.data - data to split into training and prediction data + * @returns two objects (trainingData and predictionData) + */ + export async function splitTrainingPredictionData(options?:ModelDataOptions): Promise<{trainingData:Data, predictionData: Data}>{ + if(typeof options?.on_progress==='function') options.on_progress({ + status: 'preprocessing', + loss: undefined, + completion_percentage: undefined, + epoch: undefined, + logs: undefined, + defaultLog: { + detail: 'generating training data' + } + }) + const dataset = await getDataSet(options?.data); + const {trainingData, predictionData} = dataset.reduce((result,datum)=>{ + if(options?.outputs?.filter((output)=> isEmpty(datum[output]) + ).length) result.predictionData.push(datum); + else result.trainingData.push(datum); + return result; + },{trainingData:[],predictionData:[],}) + return {trainingData,predictionData} +} + + +/** + * function that tests for empty values + * @param val + * @returns {boolean} + */ +export function isEmpty(val):boolean{ + return val === undefined || val === null || val===''; +} + +/** + * returns inputs and outputs from json data and labels by iterating through the data, if there are rows with missing values it assumes that those are output columns + * e.g., + * labels = ['col1','col2','col3','col4','col5'] + * dataset = [ + * {col1:1, col2:2, col3:3, col4:4, col5: 5}, + * {col1:10, col2:20, col3:30, col4:undefined, col5: undefined}, + * ] + * + * it will assume + * inputs=['col1','col2','col3'] + * outputs=['col4','col5'] + * + * if forcestats is set, it will run stats on each column like mean, min, max + * @param param0 + * @returns + */ + +export function getInputsOutputsFromDataset({dataset, labels, inputs=[],outputs=[], forceStats=false, on_progress}:getInputsOutputsFromDatasetOptions){ + if(typeof on_progress==='function') on_progress({ + status: 'preprocessing', + loss: undefined, + completion_percentage: undefined, + epoch: undefined, + logs: undefined, + defaultLog: { + detail: 'configuring inputs and outputs' + } + }) + if(inputs?.length && outputs?.length && forceStats===false){ + return { + inputs, + outputs, + columns: undefined + } + } else{ + const columns:columnStat[] = labels.reduce((stats:columnStat[],label)=>{ + const labelValues = DataSet.columnArray(label,{ + data: dataset, + filter: val=> !isEmpty(val) + }); + const dataType = typeof labelValues[0]; + const mean = dataType==='number'?DataSetUtil.mean(labelValues):undefined; + const min = DataSetUtil.min(labelValues); + const max = DataSetUtil.max(labelValues); + stats.push({ + label, + labelValues, + dataType, + mean, + min, + max, + values: labelValues.length + }); + return stats; + },[]); + const maxColumnValue = columns.sort((a,b)=>b.values-a.values)[0].values; + const [derivedInputs,derivedOutputs]:[string[],string[]] = columns.reduce((result,columnStat)=>{ + + if(columnStat.values0 && !options?.columnLabels &&(typeof data[0][0] === typeof data[1][0])){ + labelsAsFirstRow = false; + derivedLabels = data[0].reduce((result,item,index)=>{ + result.push(`column_${index+1}`) + return result; + },[]); + } else if(!options?.columnLabels) derivedLabels = vectors?.splice(0,1)[0] as string[]; + + const labels = options?.columnLabels || derivedLabels; + const dataset = DataSet.reverseColumnMatrix({labels,vectors}); + return { + vectors, + labels, + dataset, + labelsAsFirstRow, + } } \ No newline at end of file