Skip to content

Commit

Permalink
Merge pull request #568 from neuroscout/enh/extract_dict
Browse files Browse the repository at this point in the history
Extract additional language features
  • Loading branch information
adelavega committed May 15, 2019
2 parents 62c9058 + 7fd8ca2 commit 6394bbc
Show file tree
Hide file tree
Showing 7 changed files with 112 additions and 37 deletions.
15 changes: 9 additions & 6 deletions neuroscout/config/datasets/ds001338.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@
],
"extractors": [
["PredefinedDictionaryExtractor", {"variables":
{"affect":["V.Mean.Sum", "A.Mean.Sum", "D.Mean.Sum"],
"aoa": ["AoA_Kup"],
"concreteness": ["Conc.M"]},
"missing": "n/a"} ],
["LengthExtractor", {}]
],
{"massiveauditorylexicaldecision":
["StressPattern", "NumSylls", "NumPhones", "Duration",
"OrthUP", "PhonND", "OrthND", "POS", "AllPOS", "FreqSUBTLEX",
"FreqCOCA", "FreqCOCAspok", "FreqGoogle", "PhonUP", "StressCat",
"PhonLev", "NumMorphs", "OrthLev"],
"calgarysemanticdecision": ["Concrete_rating"]
},
"missing": "n/a"} ]
],
"transformations": [
],
"ingest_args": {
Expand Down
16 changes: 8 additions & 8 deletions neuroscout/config/datasets/ds001545.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
],
"extractors": [
["PredefinedDictionaryExtractor", {"variables":
{"subtlexusfrequency":["FREQcount", "CDcount", "FREQlow", "Cdlow", "SUBTLWF","Lg10WF", "SUBTLCD", "Lg10CD", "Dom_PoS_SUBTLEX", "Freq_dom_PoS_SUBTLEX", "Percentage_dom_PoS", "All_PoS_SUBTLEX", "All_freqs_SUBTLEX", "Zipf-value"]},
"missing": "n/a"} ],
["PredefinedDictionaryExtractor", {"variables":
{"affect":["V.Mean.Sum", "A.Mean.Sum", "D.Mean.Sum"],
"aoa": ["AoA_Kup"],
"concreteness": ["Conc.M"]},
"missing": "n/a"} ],
["LengthExtractor", {}]
{"massiveauditorylexicaldecision":
["StressPattern", "NumSylls", "NumPhones", "Duration",
"OrthUP", "PhonND", "OrthND", "POS", "AllPOS", "FreqSUBTLEX",
"FreqCOCA", "FreqCOCAspok", "FreqGoogle", "PhonUP", "StressCat",
"PhonLev", "NumMorphs", "OrthLev"],
"calgarysemanticdecision": ["Concrete_rating"]
},
"missing": "n/a"} ]
],
"transformations": [
],
Expand Down
13 changes: 8 additions & 5 deletions neuroscout/config/datasets/forrest.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@
],
"extractors": [
["PredefinedDictionaryExtractor", {"variables":
{"affect":["V.Mean.Sum", "A.Mean.Sum", "D.Mean.Sum"],
"aoa": ["AoA_Kup"],
"concreteness": ["Conc.M"]},
"missing": "n/a"} ],
["LengthExtractor", {}]
{"massiveauditorylexicaldecision":
["StressPattern", "NumSylls", "NumPhones", "Duration",
"OrthUP", "PhonND", "OrthND", "POS", "AllPOS", "FreqSUBTLEX",
"FreqCOCA", "FreqCOCAspok", "FreqGoogle", "PhonUP", "StressCat",
"PhonLev", "NumMorphs", "OrthLev"],
"calgarysemanticdecision": ["Concrete_rating"]
},
"missing": "n/a"} ]
],
"transformations": [
],
Expand Down
15 changes: 9 additions & 6 deletions neuroscout/config/datasets/hbn.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@
],
"extractors": [
["PredefinedDictionaryExtractor", {"variables":
{"affect":["V.Mean.Sum", "A.Mean.Sum", "D.Mean.Sum"],
"aoa": ["AoA_Kup"],
"concreteness": ["Conc.M"]},
"missing": "n/a"} ],
["LengthExtractor", {}]
],
{"massiveauditorylexicaldecision":
["StressPattern", "NumSylls", "NumPhones", "Duration",
"OrthUP", "PhonND", "OrthND", "POS", "AllPOS", "FreqSUBTLEX",
"FreqCOCA", "FreqCOCAspok", "FreqGoogle", "PhonUP", "StressCat",
"PhonLev", "NumMorphs", "OrthLev"],
"calgarysemanticdecision": ["Concrete_rating"]
},
"missing": "n/a"} ]
],
"transformations":[
],
"filters": {
Expand Down
15 changes: 9 additions & 6 deletions neuroscout/config/datasets/life.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@
],
"extractors": [
["PredefinedDictionaryExtractor", {"variables":
{"affect":["V.Mean.Sum", "A.Mean.Sum", "D.Mean.Sum"],
"aoa": ["AoA_Kup"],
"concreteness": ["Conc.M"]},
"missing": "n/a"} ],
["LengthExtractor", {}]
],
{"massiveauditorylexicaldecision":
["StressPattern", "NumSylls", "NumPhones", "Duration",
"OrthUP", "PhonND", "OrthND", "POS", "AllPOS", "FreqSUBTLEX",
"FreqCOCA", "FreqCOCAspok", "FreqGoogle", "PhonUP", "StressCat",
"PhonLev", "NumMorphs", "OrthLev"],
"calgarysemanticdecision": ["Concrete_rating"]
},
"missing": "n/a"} ]
],
"transformations":[
]
}
Expand Down
15 changes: 9 additions & 6 deletions neuroscout/config/datasets/raiders.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@
],
"extractors": [
["PredefinedDictionaryExtractor", {"variables":
{"affect":["V.Mean.Sum", "A.Mean.Sum", "D.Mean.Sum"],
"aoa": ["AoA_Kup"],
"concreteness": ["Conc.M"]},
"missing": "n/a"} ],
["LengthExtractor", {}]
],
{"massiveauditorylexicaldecision":
["StressPattern", "NumSylls", "NumPhones", "Duration",
"OrthUP", "PhonND", "OrthND", "POS", "AllPOS", "FreqSUBTLEX",
"FreqCOCA", "FreqCOCAspok", "FreqGoogle", "PhonUP", "StressCat",
"PhonLev", "NumMorphs", "OrthLev"],
"calgarysemanticdecision": ["Concrete_rating"]
},
"missing": "n/a"} ]
],
"transformations":[
]
}
Expand Down
60 changes: 60 additions & 0 deletions neuroscout/config/feature_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,66 @@
},
"aoa.AoA_Kup": {
"description": "Age of aquisition of spoken words."
},
"massiveauditorylexicaldecision_IsWord": {
"description": "Whether the item is a word or pseudo-word."
},
"massiveauditorylexicaldecision_StressPattern": {
"description": "The stress pattern of the word."
},
"massiveauditorylexicaldecision_NumSylls": {
"description": "The number of syllables in the item."
},
"massiveauditorylexicaldecision_NumPhones": {
"description": "The number of phones in the item."
},
"massiveauditorylexicaldecision_Duration": {
"description": "The duration of the item in milliseconds."
},
"massiveauditorylexicaldecision_OrthUP": {
"description": "The letter index of the orthographic uniqueness point of the item."
},
"massiveauditorylexicaldecision_PhonND": {
"description": "The number of phonological neighbors (one phone edit away)."
},
"massiveauditorylexicaldecision_OrthND": {
"description": "The number of orthographic neighbors (one glyph edit away)."
},
"massiveauditorylexicaldecision_POS": {
"description": "The frequency-dominant part-of-speech of the orthographic form."
},
"massiveauditorylexicaldecision_AllPOS": {
"description": "All parts-of-speech of the orthographic wordform."
},
"massiveauditorylexicaldecision_FreqSUBTLEX": {
"description": "The frequency of the orthographic word form (SUBTLEX-US corpus)."
},
"massiveauditorylexicaldecision_FreqCOCA": {
"description": "Word frequency in the COCA corpus."
},
"massiveauditorylexicaldecision_FreqCOCAspok": {
"description": "Word frequency in the spoken language subset of the COCA corpus."
},
"massiveauditorylexicaldecision_FreqGoogle": {
"description": "Word frequency in the Google Unigram corpus."
},
"massiveauditorylexicaldecision_PhonUP": {
"description": "The phone index of the phonological uniqueness point of the item."
},
"massiveauditorylexicaldecision_StressCat": {
"description": "The stress category of word items."
},
"massiveauditorylexicaldecision_PhonLev": {
"description": "Mean phone-level Levenshtein distance from all entries in CMU-A."
},
"massiveauditorylexicaldecision_NumMorphs": {
"description": "The number of morphemes."
},
"massiveauditorylexicaldecision_OrthLev": {
"description": "Mean orthographic Levenshtein distance from all entries in CMU-A."
},
"calgarysemanticdecision_Concrete_rating": {
"description": "Concreteness (calgarysemanticdecision)."
}
}
}
Expand Down

0 comments on commit 6394bbc

Please sign in to comment.