Skip to content

Commit

Permalink
new feature: human in the loop auto labeling (#571)
Browse files Browse the repository at this point in the history
* upload analyze file to assets

* move spinner label to strings

* adjust labels

* issuefix

* typo

* adjust getLabelValues

* typo

* remove option bar when upload asset in the future runs

* feature:  "Auto Labeling"

* move AutoLabelingStatus to predictService.ts

* update ocr when auto labeling

(cherry picked from commit 42438598d5d7a325e82bbad191ed483f44094439)

* fix tslint error

* handle formRegion.text undefined error

* decode asset name when upload asset

* add HITL document override confirm

* Merge branch 'yongbing-chen/human-in-the-loop' of https://github.com/microsoft/OCR-Form-Tools into yongbing-chen/human-in-the-loop

* disable autolabeling button when no predict model

* move runAutoLabelingOnCurrentDocument to canvas.tsx

* remove assetMetadata if no labels find after auto-labeling

* fix tslint check

* sync regions

* auto-labeling

* Merge branch 'yongbing-chen/human-in-the-loop' into yongbing-chen/human-in-the-loop-auto-labeling

* optimize asset metadata sync

* object deep clone

* fix state update error, change confidence position

* update addRegionsToAsset argument list

* change uploadAssetPredictResult name to syncAssetPredictResult

* refactor getBadgeTaggedClass

* deep clone object

* fix state mutation error, add uploadPredictResultAsOrcResult

* fix mutation error and asset state error

* fix data.analyzeResult.errors break chinge

* updateSelectAsset

* solve tag-input-body-container overflow conflict

* set .canvas-ocr-loading z-index

* add background to confidence value area, adjust the prev, next button position

Co-authored-by: alex-krasn <64093224+alex-krasn@users.noreply.github.com>
  • Loading branch information
yongbing-chen and alex-krasn committed Oct 13, 2020
1 parent 7d1f871 commit c1f227d
Show file tree
Hide file tree
Showing 21 changed files with 507 additions and 176 deletions.
11 changes: 8 additions & 3 deletions src/common/localization/en-us.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ export const english: IAppStrings = {
backEndNotAvailable: "Checkbox feature will work in future version of Form Recognizer service, please stay tuned.",
addName: "Add a model name...",
downloadJson: "Download JSON file",
trainConfirm: {
title: "Labels not revised yet",
message: "You have label files not yet revised, do you want to train with those files?"
},
errors: {
electron: {
cantAccessFiles: "Cannot access files in '${folderUri}' for training. Please check if specified folder URI is correct."
Expand Down Expand Up @@ -209,7 +213,7 @@ export const english: IAppStrings = {
defaultURLInput: "Paste or type URL...",
editAndUploadToTrainingSet: "Edit & upload to training set",
editAndUploadToTrainingSetNotify: "by clicking on this button, this form will be added to this project, where you can edit these labels.",
editAndUploadToTrainingSetNotify2: "We are adding this file to your training set, where you could edit the labels and re-train the model.",
editAndUploadToTrainingSetNotify2: "We are adding this file to your training set, where you can edit the labels and re-train the model.",
uploadInPrgoress: "Upload in progress...",
confirmDuplicatedAssetName: {
title: "Asset name exists",
Expand Down Expand Up @@ -447,7 +451,8 @@ export const english: IAppStrings = {
subIMenuItems: {
runOcrOnCurrentDocument: "Run OCR on current document",
runOcrOnAllDocuments: "Run OCR on all documents",
runAutoLabelingCurrentDocument: "Run AutoLabeling on current document",
runAutoLabelingCurrentDocument: "Auto-label the current document",
runAutoLabelingOnNotLabelingDocuments: "Auto-label new documents",
noPredictModelOnProject: "Predict model not avaliable, please train the model first.",
}
}
Expand Down Expand Up @@ -522,7 +527,7 @@ export const english: IAppStrings = {
tips: {
quickLabeling: {
name: "Lable with hot keys",
description: "Hotkeys 1 through 0 and all letters are assigned to first 36 tags. After selecting one or multiple words, press tag's assigned hotkey.",
description: "Hotkeys 1 through 0 and all letters are assigned to first 36 tags. After selecting one or multiple words, press tag's assigned hotkey.",
},
renameTag: {
name: "Rename tag",
Expand Down
7 changes: 6 additions & 1 deletion src/common/localization/es-cl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ export const spanish: IAppStrings = {
backEndNotAvailable: "La función de casilla de verificación funcionará en la versión futura del servicio de reconocimiento de formularios, manténgase atento.",
addName: "Agregar nombre de modelo ...",
downloadJson: "Descargar archivo JSON",
trainConfirm: {
title: "Etiquetas no revisadas todavía",
message: "Tiene archivos de etiquetas que aún no han sido revisados, ¿desea entrenar con esos archivos?"
},
errors: {
electron: {
cantAccessFiles: "No se puede acceder a los archivos en '${folderUri}' para entrenamiento. Compruebe si el URI de la carpeta especificada es correcto."
Expand Down Expand Up @@ -448,7 +452,8 @@ export const spanish: IAppStrings = {
subIMenuItems: {
runOcrOnCurrentDocument: "Ejecutar OCR en el documento actual",
runOcrOnAllDocuments: "Ejecute OCR en todos los documentos",
runAutoLabelingCurrentDocument: "Ejecutar AutoLabeling en el documento actual",
runAutoLabelingCurrentDocument: "Etiquetar automáticamente el documento actual",
runAutoLabelingOnNotLabelingDocuments: "Etiquetar documentos nuevos automáticamente",
noPredictModelOnProject: "Predecir modelo no disponible, entrene el modelo primero.",
}
}
Expand Down
5 changes: 5 additions & 0 deletions src/common/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ export interface IAppStrings {
backEndNotAvailable: string,
addName: string,
downloadJson: string;
trainConfirm: {
title: string;
message: string;
},
errors: {
electron: {
cantAccessFiles: string;
Expand Down Expand Up @@ -443,6 +447,7 @@ export interface IAppStrings {
runOcrOnCurrentDocument: string,
runOcrOnAllDocuments: string,
runAutoLabelingCurrentDocument: string,
runAutoLabelingOnNotLabelingDocuments: string,
noPredictModelOnProject: string,
}
}
Expand Down
19 changes: 19 additions & 0 deletions src/models/applicationState.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ export interface IAsset {
id: string,
type: AssetType,
state: AssetState,
labelingState?: AssetLabelingState,
name: string,
path: string,
size: ISize,
Expand All @@ -163,6 +164,7 @@ export interface IAsset {
predicted?: boolean,
ocr?: any,
isRunningOCR?: boolean,
isRunningAutoLabeling?: boolean,
cachedImage?: string,
mimeType?: string,
}
Expand Down Expand Up @@ -208,6 +210,7 @@ export interface IRegion {
boundingBox?: IBoundingBox,
value?: string,
pageNumber: number,
changed?: boolean,
}

/**
Expand All @@ -216,6 +219,7 @@ export interface IRegion {
*/
export interface ILabelData {
document: string,
labelingState?: AssetLabelingState;
labels: ILabel[],
}

Expand All @@ -228,6 +232,7 @@ export interface ILabel {
key?: IFormRegion[],
value: IFormRegion[],
labelType?: string,
confidence?: number,
}

/**
Expand Down Expand Up @@ -367,6 +372,20 @@ export enum AssetState {
Visited = 1,
Tagged = 2,
}
/**
* @name - Asset Labeling State
* @description - Defines the labeling state for the asset
* @member ManualLabeling - Specifies as an asset that has manual labeling the tags
* @member Training - Specifies as an asset tagged data has been used for training model
* @member AutoLabeling - Specifies as an asset that has run auto-labeling
* @member AutoLabelingAndAdusted -specifies as an asset that has run auto-labeling and tags manual adjusted
*/
export enum AssetLabelingState {
ManualLabeling = 1,
Training = 2,
AutoLabeling = 3,
AutoLabelingAndAdusted = 4,
}

/**
* @name - Region Type
Expand Down
9 changes: 7 additions & 2 deletions src/providers/storage/azureBlobStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { BlobServiceClient, ContainerClient } from "@azure/storage-blob";
import { constants } from "../../common/constants";
import { strings } from "../../common/strings";
import { AppError, AssetState, AssetType, ErrorCode, IAsset, StorageType } from "../../models/applicationState";
import { AppError, AssetState, AssetType, ErrorCode, IAsset, StorageType, ILabelData, AssetLabelingState } from "../../models/applicationState";
import { throwUnhandledRejectionForEdge } from "../../react/components/common/errorHandler/errorHandler";
import { AssetService } from "../../services/assetService";
import { IStorageProvider } from "./storageProviderFactory";
Expand Down Expand Up @@ -214,12 +214,17 @@ export class AzureBlobStorage implements IStorageProvider {

if (files.find((str) => str === labelFileName)) {
asset.state = AssetState.Tagged;
const labelFileName = decodeURIComponent(`${asset.name}${constants.labelFileExtension}`);
const json = await this.readText(labelFileName, true);
const labelData = JSON.parse(json) as ILabelData;
if (labelData) {
asset.labelingState = labelData.labelingState || AssetLabelingState.ManualLabeling;
}
} else if (files.find((str) => str === ocrFileName)) {
asset.state = AssetState.Visited;
} else {
asset.state = AssetState.NotVisited;
}

result.push(asset);
}
}
Expand Down
10 changes: 9 additions & 1 deletion src/react/components/common/assetPreview/assetPreview.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,15 @@ export class AssetPreview extends React.Component<IAssetPreviewProps, IAssetPrev
<div className="asset-loading">
<div className="asset-loading-ocr-spinner">
<Label className="p-0" ></Label>
<Spinner size={SpinnerSize.small} label="Running OCR..." ariaLive="off" labelPosition="right"/>
<Spinner size={SpinnerSize.small} label="Running OCR..." ariaLive="off" labelPosition="right" />
</div>
</div>
}
{this.props.asset.isRunningAutoLabeling &&
<div className="asset-loading">
<div className="asset-loading-ocr-spinner">
<Label className="p-0" ></Label>
<Spinner size={SpinnerSize.small} label="Auto Labeling..." ariaLive="off" labelPosition="right" />
</div>
</div>
}
Expand Down
24 changes: 23 additions & 1 deletion src/react/components/common/tagInput/tagInput.scss
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@
&-container {
overflow-x: visible;
overflow-y: auto;
padding: 0 0 0 100px;
margin: 0 0 0 -100px;
&::before{
content: " ";
display: inline-block;
position: absolute;
width: 80px;
height: 100%;
left: -80px;
background: linear-gradient(to right, #00000000 0%,#000000 100%);
}

};
}

Expand Down Expand Up @@ -73,13 +85,23 @@
}

&-item-block {
position: relative;
display: flex;
flex-direction: row;
margin: 2px 0;

&-2 {
width: 100%;
}
.tag-item-confidence{
position: absolute;
line-height: 2em;
left: -70PX;
z-index: 900;
text-align: right;
width:50px;
text-shadow: 1px 1px 1px #333;
}
}

&-item {
Expand Down Expand Up @@ -276,5 +298,5 @@ div.circle-picker-container {
}

.loading-tag {
height: 100%;
height: 100%;
}
40 changes: 23 additions & 17 deletions src/react/components/common/tagInput/tagInputItem.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { ITag, ILabel, FieldType, FieldFormat } from "../../../../models/applica
import { strings } from "../../../../common/strings";
import TagInputItemLabel from "./tagInputItemLabel";
import { tagIndexKeys } from "./tagIndexKeys";
import _ from "lodash";

export interface ITagClickProps {
ctrlKey?: boolean;
Expand Down Expand Up @@ -79,9 +80,14 @@ export default class TagInputItem extends React.Component<ITagInputItemProps, IT
const style: any = {
background: this.props.tag.color,
};

const confidence = _.get(this.props, "labels[0].confidence", null);
return (
<div className={"tag-item-block"}>
{confidence &&
<div className="tag-item-confidence">
{confidence}
</div>
}
<div
className={"tag-color"}
style={style}
Expand Down Expand Up @@ -163,20 +169,20 @@ export default class TagInputItem extends React.Component<ITagInputItemProps, IT
<div className="tag-name-body">
{
this.state.isRenaming
?
<input
ref={this.onInputRef}
className={`tag-name-editor ${this.getContentClassName()}`}
type="text"
defaultValue={this.props.tag.name}
onKeyDown={(e) => this.onInputKeyDown(e)}
onBlur={this.onInputBlur}
autoFocus={true}
/>
:
<span title={this.props.tag.name} className={this.getContentClassName()}>
{this.props.tag.name}
</span>
?
<input
ref={this.onInputRef}
className={`tag-name-editor ${this.getContentClassName()}`}
type="text"
defaultValue={this.props.tag.name}
onKeyDown={(e) => this.onInputKeyDown(e)}
onBlur={this.onInputBlur}
autoFocus={true}
/>
:
<span title={this.props.tag.name} className={this.getContentClassName()}>
{this.props.tag.name}
</span>
}
</div>
<div className={"tag-icons-container"}>
Expand All @@ -190,7 +196,7 @@ export default class TagInputItem extends React.Component<ITagInputItemProps, IT
title={strings.tags.toolbar.contextualMenu}
ariaLabel={strings.tags.toolbar.contextualMenu}
className="tag-input-toolbar-iconbutton ml-2"
iconProps={{iconName: "ChevronDown"}}
iconProps={{ iconName: "ChevronDown" }}
onClick={this.onDropdownClick} />
</div>
</div>
Expand Down Expand Up @@ -258,7 +264,7 @@ export default class TagInputItem extends React.Component<ITagInputItemProps, IT
}

private isTypeOrFormatSpecified = () => {
const {tag} = this.props;
const { tag } = this.props;
return (tag.type && tag.type !== FieldType.String) ||
(tag.format && tag.format !== FieldFormat.NotSpecified);
}
Expand Down
6 changes: 4 additions & 2 deletions src/react/components/pages/editorPage/canvas.scss
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
background-color: $darker-1;
border: solid 1px $lighter-2;
color: rgb(0, 161, 241);
z-index: 10;

&:hover, &.active {
background-color: $darker-2;
Expand All @@ -39,14 +40,14 @@
.prev {
position: absolute;
top: 50%;
left: 0;
left: 50px;
margin-left: 10px;
}

.next {
position: absolute;
top: 50%;
right: 0;
right: 50px;
margin-right: 10px;
}

Expand Down Expand Up @@ -77,6 +78,7 @@
background-color: rgba(0, 0, 0, 0.8);
text-align: center;
display: flex;
z-index: 11;
}

.canvas-ocr-loading-spinner {
Expand Down
Loading

0 comments on commit c1f227d

Please sign in to comment.