Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 39 additions & 40 deletions algorithms/tamil/stem_Unicode.sbl
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ integers (

/*
define q_suffixes '{vs_aa}{vs_oo}{vs_ee}'
define q_prefixes '{e}'
define q_prefixes '{e}'
define word_starter '{ka}{ca}{tha}{va}{na}{pa}{ma}{ya}{nga}{nya}'
define suttezhuthu '{a}{i}{u}'
define vallinam '{ka}{ca}{tta}{tha}{pa}{rra}'
Expand Down Expand Up @@ -236,7 +236,7 @@ define remove_command_suffixes as (
unset found_a_match
backwards (
[ among('{pa}{vs_i}' '{va}{vs_i}') ] delete
(set found_a_match)
(set found_a_match)
)
)

Expand All @@ -250,40 +250,40 @@ define remove_um as (
)

define remove_common_word_endings as (
// These are not suffixes actually but are
// These are not suffixes actually but are
// some words that are attached to other words
// but can be removed for stemming
unset found_a_match
has_min_length
backwards (
test ( [ '{vs_u}{tta}{nnna}{pulli}' or
'{vs_i}{la}{pulli}{la}{vs_ai}' or
'{vs_i}{tta}{ma}{pulli}' or
'{vs_i}{tta}{ma}{pulli}' or
'{vs_i}{nnna}{pulli}{rra}{vs_i}' or
'{vs_aa}{ka}{vs_i}' or
'{vs_aa}{ka}{vs_i}' or
'{vs_aa}{ka}{vs_i}{ya}' or
'{vs_e}{nnna}{pulli}{rra}{vs_u}' or
'{vs_e}{nnna}{pulli}{rra}{vs_u}' or
'{vs_u}{lla}{pulli}{lla}' or
'{vs_u}{tta}{vs_ai}{ya}' or
'{vs_u}{tta}{vs_ai}{ya}' or
'{vs_u}{tta}{vs_ai}' or
'{vs_e}{nnna}{vs_u}{ma}{pulli}' or
('{la}{pulli}{la}' test (not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
'{vs_e}{nnna}' or
'{vs_e}{nnna}' or
'{vs_aa}{ka}{vs_i}' ] <- '{pulli}'
(set found_a_match)
)
or
test ( [ among('{pa}{tta}{vs_u}'
'{pa}{tta}{pulli}{tta}'
test ( [ among('{pa}{tta}{vs_u}'
'{pa}{tta}{pulli}{tta}'
'{pa}{tta}{pulli}{tta}{vs_u}'
'{pa}{tta}{pulli}{tta}{ta}{vs_u}'
'{pa}{tta}{pulli}{tta}{nna}'
'{ka}{vs_u}{ra}{vs_i}{ya}'
'{pa}{tta}{pulli}{tta}{ta}{vs_u}'
'{pa}{tta}{pulli}{tta}{nna}'
'{ka}{vs_u}{ra}{vs_i}{ya}'
'{pa}{rra}{pulli}{rra}{vs_i}'
'{va}{vs_i}{tta}{vs_u}'
'{va}{vs_i}{tta}{pulli}{tta}{vs_u}'
'{pa}{tta}{vs_i}{ta}{vs_aa}{nnna}'
'{pa}{tta}{vs_i}'
'{pa}{tta}{vs_i}'
'{ta}{vs_aa}{nnna}'
'{vs_e}{la}{pulli}{la}{vs_aa}{ma}{pulli}')
] delete
Expand All @@ -305,35 +305,34 @@ define remove_vetrumai_urupukal as (
'{vs_ai}' (test not among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}'))) or
( '{vs_ai}' (test (among('{ka}' '{ca}' '{tta}' '{tha}' '{pa}' '{rra}') '{pulli}')))
] <- '{pulli}'

)
or
test ( [
'{vs_o}{tta}{vs_u}' or
test ( [
'{vs_o}{tta}{vs_u}' or
'{vs_oo}{tta}{vs_u}' or
'{vs_i}{la}{pulli}' or
'{vs_i}{la}{pulli}' or
'{vs_i}{rra}{pulli}' or
('{vs_i}{nnna}{pulli}' (test not '{ma}')) or
'{vs_i}{nnna}{pulli}{rra}{vs_u}' or
'{vs_i}{ra}{vs_u}{na}{pulli}{ta}{vs_u}' or
'{va}{vs_i}{tta}' or
'{va}{vs_i}{tta}' or
($length >= 7 '{vs_i}{tta}{ma}{pulli}') or
'{vs_aa}{la}{pulli}' or
'{vs_aa}{la}{pulli}' or
'{vs_u}{tta}{vs_ai}' or
'{vs_aa}{ma}{la}{pulli}' or
('{la}{pulli}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
'{vs_u}{lla}{pulli}'
'{vs_u}{lla}{pulli}'
] <- '{pulli}'
)
or
test ( [
'{ka}{nna}{pulli}' or
test ( [
'{ka}{nna}{pulli}' or
'{ma}{vs_u}{nnna}{pulli}' or
'{ma}{vs_ee}{la}{pulli}' or
'{ma}{vs_ee}{la}{pulli}' or
'{ma}{vs_ee}{rra}{pulli}' or
'{ka}{vs_ii}{llla}{pulli}' or
'{ka}{vs_ii}{llla}{pulli}' or
'{pa}{vs_i}{nnna}{pulli}' or
('{ta}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')))
('{ta}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}')))
] delete
)
or
Expand Down Expand Up @@ -364,30 +363,30 @@ define remove_tense_suffix as (
)
or
test ( [
'{ma}{vs_aa}{ra}{pulli}' or
'{ma}{vs_aa}{ra}{pulli}' or
'{ma}{vs_i}{nnna}{pulli}' or
'{nnna}{nnna}{pulli}' or
'{nnna}{nnna}{pulli}' or
'{nnna}{vs_aa}{nnna}{pulli}' or
'{nnna}{vs_aa}{lla}{pulli}' or
'{nnna}{vs_aa}{lla}{pulli}' or
'{nnna}{vs_aa}{ra}{pulli}' or
('{va}{nnna}{pulli}' test (not among('{a}' '{aa}' '{i}' '{ii}' '{u}' '{uu}' '{e}' '{ee}' '{ai}' '{o}' '{oo}' '{au}')) ) or
'{nnna}{lla}{pulli}' or
'{nnna}{lla}{pulli}' or
'{va}{lla}{pulli}' or
'{nnna}{ra}{pulli}' or
'{nnna}{ra}{pulli}' or
'{va}{ra}{pulli}' or
'{nnna}' or '{pa}' or '{ka}' or '{ta}' or '{ya}' or
'{pa}{nnna}{pulli}' or
'{pa}{nnna}{pulli}' or
'{pa}{lla}{pulli}' or
'{pa}{ra}{pulli}' or
('{ta}{vs_u}' (test not among('{vs_aa}' '{vs_i}' '{vs_ii}' '{vs_e}' '{vs_ee}' '{vs_u}' '{vs_uu}' '{vs_ai}'))) or
'{vs_i}{rra}{pulli}{rra}{vs_u}' or
'{pa}{ma}{pulli}' or
'{pa}{ma}{pulli}' or
'{nnna}{ma}{pulli}' or
'{ta}{vs_u}{ma}{pulli}' or
'{ta}{vs_u}{ma}{pulli}' or
'{rra}{vs_u}{ma}{pulli}' or
'{ka}{vs_u}{ma}{pulli}' or
'{ka}{vs_u}{ma}{pulli}' or
'{nnna}{vs_e}{nnna}{pulli}' or
'{nnna}{vs_ai}' or
'{nnna}{vs_ai}' or
'{va}{vs_ai}'
] delete
(set found_a_match)
Expand All @@ -397,11 +396,11 @@ define remove_tense_suffix as (
('{vs_aa}{nnna}{pulli}' test (not '{ca}')) or
'{vs_aa}{lla}{pulli}' or
'{vs_aa}{ra}{pulli}' or
'{vs_ee}{nnna}{pulli}' or
'{vs_ee}{nnna}{pulli}' or
'{vs_aa}' or
'{vs_aa}{ma}{pulli}' or
'{vs_e}{ma}{pulli}' or
'{vs_ee}{ma}{pulli}' or
'{vs_ee}{ma}{pulli}' or
'{vs_oo}{ma}{pulli}' or
'{ka}{vs_u}{ma}{pulli}' or
'{ta}{vs_u}{ma}{pulli}' or
Expand All @@ -411,7 +410,7 @@ define remove_tense_suffix as (
'{nnna}{vs_e}{nnna}{pulli}' or
'{nnna}{vs_i}{ra}{pulli}' or
'{vs_ii}{ra}{pulli}' or
'{vs_ii}{ya}{ra}{pulli}'
'{vs_ii}{ya}{ra}{pulli}'
] <- '{pulli}'
(set found_a_match)
)
Expand All @@ -426,7 +425,7 @@ define remove_tense_suffix as (
'{ka}{vs_i}{nnna}{pulli}{rra}'
'{ka}{vs_i}{nnna}{pulli}{rra}{pulli}'
'{ka}{vs_i}{rra}'
'{ka}{vs_i}{rra}{pulli}'
'{ka}{vs_i}{rra}{pulli}'
)] delete
(set found_a_match)
)
Expand Down