From f4bc26018caf02c5b47ec32f9de936d5a8aef7a9 Mon Sep 17 00:00:00 2001 From: misavojte Date: Thu, 11 Jan 2024 14:15:08 +0100 Subject: [PATCH] :bug: Allowing segmented CSV to be classified - missing case in delimiter fn - added support for german csv delimiter --- .../class/Eye/EyeClassifier/EyeClassifier.ts | 74 +++++++++++++++---- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/src/lib/class/Eye/EyeClassifier/EyeClassifier.ts b/src/lib/class/Eye/EyeClassifier/EyeClassifier.ts index 02dfb41..50aad42 100644 --- a/src/lib/class/Eye/EyeClassifier/EyeClassifier.ts +++ b/src/lib/class/Eye/EyeClassifier/EyeClassifier.ts @@ -2,16 +2,22 @@ import type { EyeFileType } from '$lib/type/EyeFile/EyeFileType.js' import type { EyeSettingsType } from '$lib/type/Settings/EyeSettings/EyeSettingsType.js' export class EyeClassifier { - classify (slice: string): EyeSettingsType { + classify(slice: string): EyeSettingsType { const type = this.getTypeFromSlice(slice) if (type === 'unknown') throw new Error('Unknown file type') const rowDelimiter = '\r\n' - const columnDelimiter = this.getColumnDelimiter(type) + const columnDelimiter = this.getColumnDelimiter(type, slice) const headerRowId = type === 'ogama' ? 8 : 0 - return { type, rowDelimiter, columnDelimiter, userInputSetting: '', headerRowId } + return { + type, + rowDelimiter, + columnDelimiter, + userInputSetting: '', + headerRowId, + } } - getTypeFromSlice (slice: string): EyeFileType { + getTypeFromSlice(slice: string): EyeFileType { if (this.isTobii(slice)) { if (slice.includes('Event')) return 'tobii-with-event' return 'tobii' @@ -25,7 +31,7 @@ export class EyeClassifier { return 'unknown' } - getColumnDelimiter (type: EyeFileType): string { + getColumnDelimiter(type: EyeFileType, slice: string): string { switch (type) { case 'tobii': case 'tobii-with-event': @@ -33,8 +39,10 @@ export class EyeClassifier { case 'ogama': return '\t' case 'gazepoint': - case 'csv': return ',' + case 'csv': + case 'csv-segmented': + return this.determineCsvDelimiter(slice) case 'varjo': return ';' default: @@ -42,31 +50,65 @@ export class EyeClassifier { } } - isTobii (slice: string): boolean { + isTobii(slice: string): boolean { return slice.includes('Recording timestamp') } - isGazePoint (slice: string): boolean { + isGazePoint(slice: string): boolean { return slice.includes('FPOGS') && slice.includes('FPOGD') } - isBeGaze (slice: string): boolean { - return slice.includes('Event Start Trial Time [ms]') && slice.includes('Event End Trial Time [ms]') + isBeGaze(slice: string): boolean { + return ( + slice.includes('Event Start Trial Time [ms]') && + slice.includes('Event End Trial Time [ms]') + ) } - isOgama (slice: string): boolean { + isOgama(slice: string): boolean { return slice.includes('# Contents: Similarity Measurements of scanpaths.') } - isVarjo (slice: string): boolean { + isVarjo(slice: string): boolean { return slice.includes('Time') && slice.includes('Actor Label') } - isCsv (slice: string): boolean { - return slice.includes('Time') && slice.includes('Participant') && slice.includes('Stimulus') && slice.includes('AOI') + isCsv(slice: string): boolean { + return ( + slice.includes('Time') && + slice.includes('Participant') && + slice.includes('Stimulus') && + slice.includes('AOI') + ) + } + + isCsvSegmented(slice: string): boolean { + return ( + slice.includes('From') && + slice.includes('To') && + slice.includes('Participant') && + slice.includes('Stimulus') && + slice.includes('AOI') + ) } - isCsvSegmented (slice: string): boolean { - return slice.includes('From') && slice.includes('To') && slice.includes('Participant') && slice.includes('Stimulus') && slice.includes('AOI') + /** + * To determine the delimiter used in a CSV file, we count the number of occurrences of the two most common delimiters + * (',' and ';') in the header row. The delimiter with the higher count is used. + * + * @param slice - Text content of the first slice of the file, containing the header row + * @returns delimiter used in the CSV file (either ',' or ';') + */ + determineCsvDelimiter(slice: string): string { + const internationalDelimiter = ',' + const germanDelimiter = ';' + const headerOnly = slice.split('\r\n')[0] + const internationalDelimiterCount = headerOnly.split( + internationalDelimiter + ).length + const germanDelimiterCount = headerOnly.split(germanDelimiter).length + return internationalDelimiterCount > germanDelimiterCount + ? internationalDelimiter + : germanDelimiter } }