From 8babbfa811d96d4ee955dced11e75f3b12be180a Mon Sep 17 00:00:00 2001 From: Justin van der Merwe Date: Tue, 11 Oct 2022 12:38:51 +0200 Subject: [PATCH] feat: Initial implementation of `limit` option (#30) ## Video with context https://www.loom.com/share/b94094839ca04524ab661b8837eebf6e ## Problem At the moment, copycat gives you the ability to generate some fake value corresponding to some input value, for example: ```js copycat.email('real@email.org') // => 'Era_Kunde245@gmail.com' ``` However, there is currently no way to restrict the generated values to be within a given character limit. At [snaplet](https://www.snaplet.dev/), we make use of copycat to replace real values in a database with fake values. This is where the problem comes in: the real values in the database are within some character limit. However, the fake values generated by copycat that we're replacing them with are not always within this same character limit. As a result, we aren't able to use these fake values. ## Solution Add a `limit` option to each copycat API method that generates a string, for example: ``` // generated result will be <= 20 characters copycat.email('real@example.org', { limit: 20 }) ``` ## Approach Copycat makes use of composition to generate values. For example, `copycat.email()` makes use of `copycat.firstName()` and `copycat.lastName()`. The idea is to have each component in each layer of composition be aware of the character limit. For composites like `email`, allocate a `limit` to each component. For example, if the limit is `25`, then give `5` as a limit for `firstName`. Then, for each primitive/leaf (the end of the chain of composition), have it restrict its output to be within that limit. For example, `firstName` makes use of `oneOf()`, and uses it to pick from an array of first names (provided by faker). So this PR replaces this `oneOf()` usage with a new `oneOfString()` function, which will only pick from the list of values that are within that limit. If none are found, it defaults to using `copycat.word()`. For more context on the approach, take a look at the video: https://www.loom.com/share/b94094839ca04524ab661b8837eebf6e --- README.md | 16 +++ src/copycat.limit.test.ts | 249 ++++++++++++++++++++++++++++++++++++++ src/email.ts | 38 ++++-- src/firstName.ts | 11 +- src/fullName.ts | 7 +- src/join.ts | 122 +++++++++++++++++++ src/lastName.ts | 10 +- src/oneOfString.ts | 57 +++++++++ src/types.ts | 2 +- 9 files changed, 485 insertions(+), 27 deletions(-) create mode 100644 src/copycat.limit.test.ts create mode 100644 src/join.ts create mode 100644 src/oneOfString.ts diff --git a/README.md b/README.md index 13b5e7d..8219d47 100644 --- a/README.md +++ b/README.md @@ -247,6 +247,10 @@ copycat.email('foo') // => 'Zakary.Block356@gmail.com' ``` +#### `options` + +- **`limit`:** Constrain generated values to be less than or equal to `limit` number of chars + ### `copycat.firstName(input)` Takes in an [input](#input) and returns a string value resembling a first name. @@ -256,6 +260,10 @@ copycat.firstName('foo') // => 'Alejandrin' ``` +#### `options` + +- **`limit`:** Constrain generated values to be less than or equal to `limit` number of chars + ### `copycat.lastName(input)` Takes in an [input](#input) and returns a string value resembling a last name. @@ -265,6 +273,10 @@ copycat.lastName('foo') // => 'Keeling' ``` +#### `options` + +- **`limit`:** Constrain generated values to be less than or equal to `limit` number of chars + ### `copycat.fullName(input)` Takes in an [input](#input) and returns a string value resembling a full name. @@ -274,6 +286,10 @@ copycat.fullName('foo') // => 'Zakary Hessel' ``` +#### `options` + +- **`limit`:** Constrain generated values to be less than or equal to `limit` number of chars + ### `copycat.phoneNumber(input)` Takes in an [input](#input) and returns a string value resembling a [phone number](https://en.wikipedia.org/wiki/MSISDN). diff --git a/src/copycat.limit.test.ts b/src/copycat.limit.test.ts new file mode 100644 index 0000000..9f28e55 --- /dev/null +++ b/src/copycat.limit.test.ts @@ -0,0 +1,249 @@ +import { copycat } from '.' + +const NUM_CHECKS = 10 + +export const LIMIT_TRANSFORMATION_NAMES = [ + 'email', + 'firstName', + 'lastName', + 'fullName', +] as const + +const generateValues = (limit: number) => { + const results = {} + + for (const name of LIMIT_TRANSFORMATION_NAMES) { + let i = -1 + const fn = copycat[name] + const transformationResults: unknown[] = [] + results[name] = transformationResults + + while (++i < NUM_CHECKS) { + const result = fn(i, { limit }) + expect(result.length).toBeLessThanOrEqual(limit) + transformationResults.push(result) + } + } + + return results +} + +test('limit: medium', () => { + expect(generateValues(25)).toMatchInlineSnapshot(` + Object { + "email": Array [ + "Bo_King114@gmail.net", + "Bo_Koch349@yahoo.net", + "Bo_Ward471@yahoo.com", + "Bo_Toy949@gmail.com", + "Ed_Dare472@yahoo.com", + "Bo_Wiza59@yahoo.info", + "Bo_Toy189@gmail.info", + "Bo_Von60@gmail.biz", + "Ed_Kub941@gmail.org", + "Ed_Cole773@yahoo.net", + ], + "firstName": Array [ + "Cindy", + "Amara", + "Zelma", + "Glennie", + "Kaley", + "Betty", + "Laurianne", + "Horace", + "Wilson", + "Kamryn", + ], + "fullName": Array [ + "Myrl Heidenreich", + "Ignacio Reinger", + "Vesta Smith", + "Ottis Stark", + "Nolan Rutherford", + "Ernesto Jacobs", + "Eleanora Boyle", + "Jaiden Muller", + "Willow Osinski", + "Jane Glover", + ], + "lastName": Array [ + "Nitzsche", + "Ledner", + "Jakubowski", + "Boyle", + "Emard", + "Breitenberg", + "Yundt", + "Davis", + "Zulauf", + "Kuphal", + ], + } + `) +}) + +test('limit: small', () => { + expect(generateValues(10)).toMatchInlineSnapshot(` + Object { + "email": Array [ + "VvK1@ko.yu", + "MmK3@mi.ra", + "SsK4@vi.vi", + "KsY9@mi.so", + "MmR4@yu.ko", + "YrK5@yu.vi", + "KkS1@ra.vi", + "MmR6@ko.ra", + "YsM9@yu.yu", + "MyV7@so.yu", + ], + "firstName": Array [ + "Eryn", + "Osborne", + "Lamar", + "Lance", + "Frank", + "Breanna", + "Alden", + "Stewart", + "Rebeka", + "Kira", + ], + "fullName": Array [ + "Bud Yost", + "Ena Batz", + "Ian Koch", + "Tom Ward", + "Tre Haag", + "Roy Rowe", + "Loy Conn", + "Ima Ward", + "Guy Lowe", + "Rae Fay", + ], + "lastName": Array [ + "Abernathy", + "Kris", + "Wyman", + "Kessler", + "Braun", + "Mante", + "Hirthe", + "Abbott", + "Gerlach", + "Dibbert", + ], + } + `) +}) + +test('limit: very large', () => { + expect(generateValues(999)).toMatchInlineSnapshot(` + Object { + "email": Array [ + "Liliane_Powlowski114@gmail.net", + "Emely_Buckridge349@yahoo.net", + "Jeffry_Kshlerin471@yahoo.com", + "Norbert_Funk949@gmail.com", + "Lyda_Schowalter472@yahoo.com", + "Kaylie_Yost59@yahoo.info", + "Catherine_Schmitt189@gmail.info", + "Elinore_Kshlerin60@gmail.biz", + "Jace_Boehm941@gmail.org", + "Howell_Bergnaum773@yahoo.net", + ], + "firstName": Array [ + "Cindy", + "Amara", + "Zelma", + "Glennie", + "Kaley", + "Betty", + "Laurianne", + "Horace", + "Wilson", + "Kamryn", + ], + "fullName": Array [ + "Liliane Heidenreich", + "Emely Reinger", + "Jeffry Smith", + "Norbert Stark", + "Lyda Rutherford", + "Kaylie Jacobs", + "Catherine Boyle", + "Elinore Muller", + "Jace Osinski", + "Howell Glover", + ], + "lastName": Array [ + "Nitzsche", + "Ledner", + "Jakubowski", + "Boyle", + "Emard", + "Breitenberg", + "Yundt", + "Davis", + "Zulauf", + "Kuphal", + ], + } + `) +}) + +test('limit: tiny', () => { + expect(generateValues(5)).toMatchInlineSnapshot(` + Object { + "email": Array [ + "1@k.y", + "3@m.r", + "4@v.v", + "9@m.s", + "4@y.k", + "5@y.v", + "1@r.v", + "6@k.r", + "9@y.y", + "7@s.y", + ], + "firstName": Array [ + "Otto", + "Nils", + "Bria", + "Jo", + "Alec", + "Kaci", + "Adah", + "Clay", + "Nick", + "Isac", + ], + "fullName": Array [ + "V Vi", + "M Mi", + "S So", + "K So", + "M Mi", + "Y Ra", + "K Ko", + "M Mi", + "Y So", + "M Yu", + ], + "lastName": Array [ + "Dare", + "Rau", + "Howe", + "Kris", + "King", + "Cole", + "Koss", + "Howe", + "Roob", + "Roob", + ], + } + `) +}) diff --git a/src/email.ts b/src/email.ts index c07f536..cfd9495 100644 --- a/src/email.ts +++ b/src/email.ts @@ -1,20 +1,32 @@ import faker from '@faker-js/faker' -import { int, oneOf, join } from 'fictional' +import { int } from 'fictional' import { firstName } from './firstName' +import { join } from './join' import { lastName } from './lastName' +import { oneOfString } from './oneOfString' import { Input } from './types' -const maker = join('', [ - firstName, - oneOf(['_', '.']), - lastName, - int.options({ - min: 2, - max: 999, - }), - '@', - oneOf(faker.locales.en!.internet!.free_email!), -]) +interface EmailOptions { + limit?: number +} -export const email = (input: Input): string => maker(input) +export const email = (input: Input, options: EmailOptions = {}): string => + join( + input, + '', + [ + firstName, + oneOfString(['_', '.']), + lastName, + int.options({ + min: 2, + max: 999, + }), + '@', + oneOfString(['gmail', 'yahoo', 'hotmail']), + '.', + oneOfString(faker.locales.en!.internet!.domain_suffix!), + ], + options + ) diff --git a/src/firstName.ts b/src/firstName.ts index 43b6f1c..1cc978b 100644 --- a/src/firstName.ts +++ b/src/firstName.ts @@ -1,7 +1,8 @@ import faker from '@faker-js/faker' -import { oneOf } from 'fictional' +import { oneOfString } from './oneOfString' +import { word } from './primitives' -import { Input } from './types' - -export const firstName = (input: Input): string => - oneOf(input, faker.locales.en!.name!.first_name!) +export const firstName = oneOfString( + faker.locales.en!.name!.first_name!, + word.options({ capitalize: true }) +) diff --git a/src/fullName.ts b/src/fullName.ts index 5c366e1..0feb0a8 100644 --- a/src/fullName.ts +++ b/src/fullName.ts @@ -1,8 +1,7 @@ -import { join } from 'fictional' - import { Input } from './types' import { firstName } from './firstName' import { lastName } from './lastName' +import { join } from './join' -export const fullName = (input: Input): string => - join(input, ' ', [firstName, lastName]) +export const fullName = (input: Input, options = {}): string => + join(input, ' ', [firstName, lastName], options) diff --git a/src/join.ts b/src/join.ts new file mode 100644 index 0000000..5da5d6a --- /dev/null +++ b/src/join.ts @@ -0,0 +1,122 @@ +import { hash, Input, join as fictionalJoin, JSONSerializable } from 'fictional' +import { Transform } from './types' + +interface JoinOptions { + limit?: number +} + +interface SegmentBudgetMetadata { + limit: number + fixedLen: number + fnSegmentCount: number +} + +interface SegmentBudgetState { + seenFnSegmentCount: number + takenFnSegmentLen: number +} + +export const join = ( + input: Input, + joiner: string, + segments: Transform[], + options: JoinOptions = {} +) => { + const { limit } = options + + if (limit == null) { + return fictionalJoin(input, joiner, segments) + } + + let nextInput = hash([input, 'copycat:join'] as JSONSerializable) + + const segmentBudgetMetadata = computeSegmentBudgetMetadata( + segments, + joiner, + limit + ) + + let segmentBudgetState: SegmentBudgetState = { + seenFnSegmentCount: 0, + takenFnSegmentLen: 0, + } + + const resolvedSegments = [] + + for (const segment of segments) { + nextInput = hash(nextInput) + + const [nextSegmentBudgetState, segmentResult] = resolveSegment( + nextInput, + segmentBudgetState, + segmentBudgetMetadata, + segment + ) + + segmentBudgetState = nextSegmentBudgetState + resolvedSegments.push(segmentResult) + } + + return resolvedSegments.join(joiner) +} + +const resolveSegment = ( + input: Input, + state: SegmentBudgetState, + metadata: SegmentBudgetMetadata, + segment: Transform +): [SegmentBudgetState, string] => { + if (typeof segment !== 'function') { + return [state, (segment as string).toString()] + } + + const budget = computeSegmentBudget(state, metadata) + + const segmentResult = segment(input, { limit: budget }) + .toString() + .slice(0, budget) + + const nextState: SegmentBudgetState = { + seenFnSegmentCount: state.seenFnSegmentCount + 1, + takenFnSegmentLen: state.takenFnSegmentLen + segmentResult.length, + } + + return [nextState, segmentResult] +} + +const computeSegmentBudgetMetadata = ( + segments: Transform[], + joiner: string, + limit: number +) => { + let fixedSegmentLen = 0 + let fnSegmentCount = 0 + + for (const segment of segments) { + if (typeof segment === 'function') { + fnSegmentCount++ + } else { + fixedSegmentLen += (segment as string).toString().length + } + } + + const joinsLen = segments.length * joiner.length + const fixedLen = fixedSegmentLen + joinsLen + + return { + limit, + fixedLen, + fnSegmentCount, + } +} + +const computeSegmentBudget = ( + state: SegmentBudgetState, + metadata: SegmentBudgetMetadata +) => { + const { fnSegmentCount, fixedLen, limit } = metadata + const { seenFnSegmentCount, takenFnSegmentLen } = state + const availableLen = limit - fixedLen - takenFnSegmentLen + const remainingSegmentCount = fnSegmentCount - seenFnSegmentCount + return Math.max(0, Math.floor(availableLen / remainingSegmentCount)) +} diff --git a/src/lastName.ts b/src/lastName.ts index d40341f..159c265 100644 --- a/src/lastName.ts +++ b/src/lastName.ts @@ -1,6 +1,8 @@ -import { Input } from './types' import faker from '@faker-js/faker' -import { oneOf } from 'fictional' +import { oneOfString } from './oneOfString' +import { word } from './primitives' -export const lastName = (input: Input): string => - oneOf(input, faker.locales.en!.name!.last_name!) +export const lastName = oneOfString( + faker.locales.en!.name!.last_name!, + word.options({ capitalize: true }) +) diff --git a/src/oneOfString.ts b/src/oneOfString.ts new file mode 100644 index 0000000..b302669 --- /dev/null +++ b/src/oneOfString.ts @@ -0,0 +1,57 @@ +import { Input, JSONSerializable, oneOf } from 'fictional' +import { word } from './primitives' + +export interface OneOfOptions { + limit?: number +} + +const defaultFallback = word.options({ capitalize: false }) + +export const oneOfString = ( + rawChoices: string[], + fallback = defaultFallback +) => { + const sortedChoices = rawChoices.slice().sort(compareByLength) + + const oneOfStringFn = (input: Input, options: OneOfOptions = {}) => { + const { limit } = options + + if (limit == null) { + return oneOf(input, rawChoices) + } + + const choices = constrainChoices(sortedChoices, limit) + + if (choices.length === 0) { + return fallback([input, 'copycat:oneOfString'] as JSONSerializable).slice( + 0, + limit + ) + } + + return oneOf(input, choices) + } + + return oneOfStringFn +} + +const constrainChoices = (sortedChoices: string[], limit: number) => { + let l = 0 + let r = sortedChoices.length - 1 + let m = 0 + + while (l <= r) { + m = Math.floor((l + r) / 2) + const choiceLen = sortedChoices[m].length + + if (choiceLen < limit) { + l = m + 1 + } else { + r = m - 1 + } + } + + return sortedChoices.slice(0, m) +} + +const compareByLength = (a: string, b: string): number => a.length - b.length diff --git a/src/types.ts b/src/types.ts index b296052..49d5c1d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1 +1 @@ -export type { Input } from 'fictional' +export type { Input, Maker as Transform } from 'fictional'