Skip to content

Commit

Permalink
feat: do data cleanup practice on language data
Browse files Browse the repository at this point in the history
  • Loading branch information
jonahgoldwastaken committed Oct 20, 2020
1 parent 694ec08 commit dd4c974
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 2 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
node_modules/
dist/
.cache/
.env
.env
*.csv
*.json
70 changes: 70 additions & 0 deletions src/clean-up-practice.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import data from './practice-data.json'
import { compose } from 'ramda'

const validValues = [
'nederlands',
'engels',
'duits',
'spaans',
'frans',
'tessels',
'berbers',
'marokkaans-arabisch',
'koreaans',
'cantonees',
'ghanees',
'fries',
'kroatisch',
'amerikaans',
'zweeds',
'pools',
'tamil',
'papiaments',
'indonesisch',
'gebarentaal',
'indisch',
'arabisch',
'chinees',
'html',
'scss',
'javascript',
'jquery',
'php',
'alles',
]

const shortenedValues = {
NL: 'Nederlands',
EN: 'Engels',
ENG: 'Engels',
}

const filterValueOnValidValues = (lang: string) =>
lang.length &&
Number.isNaN(Number(lang)) &&
validValues.includes(lang.toLowerCase())

const capitaliseLanguages = (answers: string[][]) =>
answers.map(item => item.map(lang => lang[0].toUpperCase() + lang.slice(1)))

const filterLanguagesOnInvalidValues = (answers: string[][]) =>
answers.map(item => item.filter(filterValueOnValidValues))

const expandShortenedLanguages = (answers: string[][]) =>
answers.map(item => item.map(lang => (shortenedValues as any)[lang] || lang))

const splitLanguageString = (answers: string[]) =>
answers.map(val => val.slice().split(/;|,|\.|\s/))

const pickSpokenLanguages = (d: typeof data) =>
d.map(item => item.gesprokenTalen)

const cleanUpSpokenLanguages = compose(
capitaliseLanguages,
filterLanguagesOnInvalidValues,
expandShortenedLanguages,
splitLanguageString,
pickSpokenLanguages
)

console.log(cleanUpSpokenLanguages(data))
3 changes: 2 additions & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@

/* Advanced Options */
"skipLibCheck": true /* Skip type checking of declaration files. */,
"forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */
"forceConsistentCasingInFileNames": true, /* Disallow inconsistently-cased references to the same file. */
"resolveJsonModule": true
},
"include": ["src/**/*"],
"exclude": ["dist/**/*", "node_modules"]
Expand Down

0 comments on commit dd4c974

Please sign in to comment.