Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 697 lines (669 sloc) 30.3 KB
#!/usr/bin/perl -w
#
# Copyright (C) 2000-2012 Nadav Har'El, Dan Kenigsberg
#
use Carp;
use FileHandle;
my $detailed_output=0;
my $detail_prefix;
# This arrays will be useful later to convert ordinary letters into final,
# and vice-versa.
my %fin = ('ë'=>'ê', 'î'=>'í', 'ð'=>'ï', 'ô'=>'ó', 'ö'=>'õ');
my %nif = ('ê'=>'ë', 'í'=>'î', 'ï'=>'ð', 'ó'=>'ô', 'õ'=>'ö');
sub outword {
my $word = shift;
my $details = shift;
# "*" sign used to signify non-existant word that should not be output.
# It will allow us to more-easily drop words without huge if()s.
return if $word =~ m/^\*/;
# change otiot-sofiot in the middle of the word
# (the silly a-z was added for our special "y" and "w" marks).
# (the ('?) and $2 are for ñðãååéõ', ñðãååéö'éí)
$word =~ s/([êíïóõ])('?)(?=[à-úa-z])/$nif{$1}$2/go;
# change special consonant marks into the proper Hebrew letters, using
# proper ktiv male rules.
# Note that the order of these conversion is important. Since they have
# the potential of changing so many words, it is highly recommended to
# diff the output files before and after the change, to see that no
# unexpected words got changed.
# The vowel markers 'a' and 'e' do nothing except to a yud (chirik male) -
# which turns it into a consonant yud; For example your(feminine) öé is
# öééê (tsere in the yud, so it's a consonant and doubled) and
# your(masculine) öé is öéê (yud is chirik male, and not doubled)
$word =~ s/é[ea]/y/go;
$word =~ s/[ea]//go;
# The vowel 'i' is a chirik chaser - it should be followed by a yud if
# necessary. We do nothing with it currently - it's only useful for words
# like ñðàié where we want to make sure that wolig.pl does not think this
# is the normal patach-aleph-yud (with no niqqud under the aleph) case as
# in úðàé.
# The first rule here is useful for transformation from ùðé to ùðééä, via
# ùðé adj-inword> ùðié feminine> ùðiéaä outword> ùðiyä outword> ùðééä
$word =~ s/iy/éé/go; # useful in stuff like ùðié - ùðééä
$word =~ s/i//go;
# Y is the same as y, except it is not translated to a double-yud (but rather
# to a single yud) when it is the last letter of the word. It's used in words
# like çåìé in which the original form of the word has a chirik male, but in
# all the inflections the yud from the chirik becomes a fully-fleged
# consonant. We do not need a similar trick for vav (w), because the
# Academia's rules do not do anything to a vav at the end of the word,
# contrary to what happens to a yud.
# I'm not sure this trick is "kosher" (based on the language), but it does
# work...
$word =~ s/Y($|(?=-))/é/go; # Y's at the end of the word
$word =~ s/Y/y/go; # the rest of the Y's are converted to y's
# The first conversion below implements the akademia's rule that a chirik
# before a yå should not be written with a é. So we convert éyå into éå.
# IDEA: to be more certain that the first é functions as a chirik, it would
# have been better to use the i character: in addition to the éä -> yä rule
# we have in the beginning of processing a word, we should do ééä -> iyä.
# Then here the rule would convert iyå, not éyå. [but everything is working
# well even without this idea]
$word =~ s/éyå/éå/go;
$word =~ s/(?<=[^åéy])y(?=[^åéyä]|$)/éé/go;
$word =~ s/y/é/go; # otherwise, just one yud.
# The first conversion below of åw to å has an interesting story. In the
# original Hebrew, the consonant å sounded like the English w or Arabic
# waw. An "u" sound (a kubuts, which we mark by å) followed by this w
# sound sounded like a long "u", which was later written with a shuruk,
# i.e., one vav. This conversion is very useful for understanding how the
# word ùå÷ is inflected (see explanation in wolig.dat).
$word =~ s/åw/å/go;
$word =~ s/(?<=[^åw])w(?=[^åw-])/åå/go; # if vav needs to be doubled, do it
$word =~ s/w/å/go; # otherwise, just one vav.
# A consonant ä (h) is always output as a ä. The only reason we are
# interested in which ä is consonant is to allow the rules earlier to double
# yud next to a consonant ä (i.e.. h), but not next to a em-kria ä.
# For example, compare àøéä (lion) and àøééä (her lion).
$word =~ s/h/ä/go;
if($detailed_output && defined($details)){
$word =~ s/-$//; # smichut is already known by the details...
$word .= " ".$detail_prefix.$details;
}
print $word."\n";
}
sub inword {
# For some combinations of àäåé at the end or beginning of a word, we can
# immediately guess that these must be consonants (and not vowels) and make
# use of that knowledge by changing the Hebrew letters into the markers
# "w", "y" we use for consonants å and é respectively.
#
# This function takes a word as inputted from wolig.dat, presumably written
# in ktiv male, and makes a few predictions, such as that a vav in the
# beginning of the word must be a consonant. Predictions that appear here
# must have two traits:
# 1. They must be useful for the correct inflection of some word.
# For example, realising that the åå at the end of îæååä is a consonant
# help us later avoid the false inflection îæååå and instead generate
# the correct îæåå.
# 2. They must be correct in 100% of the cases. For example, a rule saying
# that every appearance of åå in the input is a consonant (w) is wrong,
# because of words like öéååé.
# However, the rules only have to "appear" correct (for all the actual
# words in wolig.dat), not necessarily be linguisticly correct. For
# example, we'll see below a rule that a å at the end of a word is a
# consonant (w). This is indeed true for most nouns (öå, î÷åå÷å), but not
# for àçå. However, all of àçå's inflections have a consonant vav, and in
# the word itself we don't really care about mislabeling it "consonant"
# because a vav at the end of the word isn't doubled anyway under the
# Academia's rules.
#
# Actually the second rule can be relaxed a bit if we provide alternative
# ways to input a certain construct. For example, if "u" could signify a
# vowel vav in the input, then we wouldn't really care if in a few rare cases
# we wrongly decide a certain vav to be consonant: the user could override
# this decision by putting a "u" explicitly, instead of the vav, in the
# input file.
my $word = shift;
if(substr($word,0,1) eq "å"){
# A word cannot start with a shuruk or kubuts!
substr($word,0,1)="w";
}
if(substr($word,-4,4) eq "ååéä"){
# A word like çååéä, äìååéä, èøéååéä. I can't imagine any base noun (or
# adjective) for which such a double-vav isn't a consonant but rather
# a vav and shuruk.
substr($word,-4,2)="w";
}
if(substr($word,-1,1) eq "å"){
# This vav is a consonant (see comment above about why the few exceptions
# that do exist don't bother us).
substr($word,-1,1)="w";
} elsif(substr($word,-3,3) eq "ååä"){
# If the word ends with ååä, the user wrote in ktiv male and intended
# a consonant vav. Replace the åå by the character "w", which will be
# doubled if necessary (for ktiv male) by outword. This change actually
# makes a difference for the ñâåì_ä with åú cases: for example, the
# word î÷ååä has a plural î÷ååú and his-possesive î÷åå. Without this
# change, we get the incorrect possesive î÷ååå and plural î÷åååú.
# Similarly it is needed for the adjective ðàååä's correct feminine plural.
substr($word,-3,2)="w";
} elsif(substr($word,-2,2) eq "éä"){
substr($word,-2,1)="y";
# TODO: maybe convert ééä (in ktiv male, e.g., ñåôâðééä) into iyä.
# see outword above on a discussion about that. But everything also
# works without this change.
}
return $word;
}
#############################################################################
my ($fh,$word,$optstring,%opts);
my $infile;
if($#ARGV < 0){
$infile="wolig.dat";
} else {
if($ARGV[0] eq "-d"){
$detailed_output=!$detailed_output;
shift @ARGV;
}
$infile=$ARGV[0];
}
$fh = new FileHandle $infile, "r"
or croak "Couldn't open data file $infile for reading";
while(<$fh>){
print if /^#\*/; # print these comments.
chomp;
s/#.*$//o; # comments start with '#'.
next if /^[ ]*$/o; # ignore blank lines.
($word,$optstring)=split;
die "Type of word '".$word."' was not specified." if !defined($optstring);
undef %opts;
my $val;
foreach $opt (split /,/o, $optstring){
($opt, $val) = (split /=/o, $opt);
$val = 1 unless defined $val;
$opts{$opt}=$val;
}
if($opts{"ò"}){
############################# noun ######################################
# Shortcuts
if($opts{"àéï_ðèéåú"}){
$opts{"éçéã"}=1; $opts{"àéï_ðèéåú_éçéã"}=1;
}
if($opts{"àéï_ëéðåééí"}){
$opts{"àéï_ëéðåéé_éçéã"}=1; $opts{"àéï_ëéðåéé_øáéí"}=1;
}
# note that the noun may have several plural forms (see, for example,
# àåú). When one of the plural forms isn't explicitly specified, wolig
# tries to guess, based on simplistic heuristics that work for the majority
# of the nouns (84% of them, at one time I counted).
my $plural_none = $opts{"éçéã"} || substr($word,-3,3) eq "éåú";
my $plural_bizarre = exists($opts{"øáéí"});
my $plural_implicit = !($opts{"åú"} || $opts{"éí"} || $opts{"éåú"}
|| $opts{"àåú"} || $opts{"ééí"} || $plural_none
|| $plural_bizarre);
my $plural_iot = $opts{"éåú"} ||
($plural_implicit && (substr($word,-2,2) eq "åú"));
my $plural_xot = $opts{"àåú"};
my $plural_ot = $opts{"åú"} ||
($plural_implicit && !$plural_iot && (substr($word,-1,1) eq "ä" || substr($word,-1,1) eq "ú" ));
my $plural_im = $opts{"éí"} || ($plural_implicit && !$plural_ot && !$plural_iot);
my $plural_iim = $opts{"ééí"};
# Find gender for detailed output. This has nothing to do with word
# inflection, it's just an added value of wolig.pl...
if($detailed_output){
my $gender;
if($opts{"æëø"}){
if($opts{"ð÷áä"}){
$gender="æ,ð";
} else {
$gender="æ";
}
} elsif($opts{"ð÷áä"}){
$gender="ð"
} elsif($opts{"ñâåì_ä"}){
$gender="æ";
} elsif((substr($word,-1,1) eq "ä") && !$opts{"àáã_å"}){
$gender="ð";
} elsif(substr($word,-1,1) eq "ú" && !$opts{"éí"}){
$gender="ð";
} else {
$gender="æ";
}
$detail_prefix="$gender,";
}
# preprocess the word the user has given, converting certain ktiv male
# constructs into markers (w, y) that we can better work with later (see
# comments in inword() about what it does).
$word=inword($word);
# related singular noun forms
if(exists $opts{"ðôøã"}){
outword $opts{"ðôøã"}, "ò,éçéã"; # explicit override of the nifrad
} elsif(!$opts{"àéï_éçéã"}){
outword $word, "ò,éçéã"; # the singular noun itself
}
if($opts{"àáã_é"}){
# in words like òéôøåï and äéøéåï the first yud (coming from chirik
# or tsere in ktiv male) is lost in all but the base word
$word =~ s/é//o;
}
my $smichut=$word;
if($opts{"àéï_éçéã"} || $opts{"àéï_ðèéåú_éçéã"}){
# We mark the singular words with "*", telling outword to drop them.
# This makes the code look cleaner than a huge if statement around all
# the singular code. Maybe in the future we should move the singular
# inflection code to a seperate function, if() only around that, and
# stop all that "*" nonsense.
$smichut="*".$smichut;
}
#my $smichut_orig=$smichut;
if($opts{"îéåçã_àç"}){
# special case:
# àç, àá, çí, ôä include an extra yod in the smichut. Note that in the
# first person singular possessive, we should drop that extra yod.
# For a "im" plural, it turns out to be the same inflections as the
# plural - but this is not the case with a "ot" plural.
# Interestingly, the yud in these inflections is always a chirik
# male - it is never consonantal (never has a vowel on it).
if(substr($smichut,-1,1) eq "ä"){
# Remove the ä. Basically, only one word fits this case: ôä
$smichut=substr($smichut,0,-1);
# And add the extra third-person masuline possesive (just like the
# ñâåì_ä case, but we don't bother to check for the ñâåì_ä flag here).
outword $smichut."éäå", "ò,éçéã,ùì/äåà";
}
outword $smichut."é-", "ò,éçéã,ñîéëåú"; # smichut
outword $smichut."é", "ò,éçéã,ùì/àðé"; # possessives (kinu'im)
outword $smichut."éðå", "ò,éçéã,ùì/àðçðå";
outword $smichut."éê", "ò,éçéã,ùì/àúä";
outword $smichut."éê", "ò,éçéã,ùì/àú";
outword $smichut."éëí", "ò,éçéã,ùì/àúí";
outword $smichut."éëï", "ò,éçéã,ùì/àúï";
outword $smichut."éå", "ò,éçéã,ùì/äåà";
outword $smichut."éä", "ò,éçéã,ùì/äéà";
outword $smichut."éäï", "ò,éçéã,ùì/äï";
outword $smichut."éäí", "ò,éçéã,ùì/äí";
} else {
if(!$opts{"ñâåì_ä"}){ # replace final ä by ú, unless ñâåì_ä option
if(substr($smichut,-1,1) eq "ä" && !$opts{"ñâåì_ä"}){
substr($smichut,-1,1)="ú";
}
}
if(exists($opts{"ðñîê"})){
outword $opts{"ðñîê"}."-", "ò,éçéã,ñîéëåú";
} else {
outword $smichut."-", "ò,éçéã,ñîéëåú"; # smichut
}
if($opts{"îéåçã_ùï"}){
# academia's ktiv male rules indicate that the inflections of ùï
# (at least the plural is explicitly mentioned...) should get an
# extra yud - to make it easy to distinguish from the number ùðééí.
substr($smichut,0,-1)=substr($smichut,0,-1).'é';
substr($word,0,-1)=substr($word,0,-1).'é';
}
if(substr($word,-2,2) eq "àé" && length($word)>2){
# in words ending with patach and then the imot kria aleph yud,
# such as úðàé and âáàé, all the inflections (beside the base word
# and the smichut) are as if the yud wasn't there.
# Note that words ending with àé but not patach, like àé and ñðàé,
# should not get this treatment, so there should be an option to turn
# it off.
substr($word,-1,1)="";
substr($smichut,-1,1)="";
}
# Note that the extra vowel markers, 'a' and 'e' are added for mele'im
# ending with yud (e.g., àé) - this vowel attaches to the yud and makes
# the yud a consonant. This phenomenon is handled in outword.
my $no_ah=0;
if($opts{"ñâåì_ä"}){
# the ä is dropped from the singular inflections, except one alternate
# inflection like îåøäå (the long form of îåøå):
# (there's another femenine inflection, îåøä with kamats on the he,
# but this is spelled the same (as îåøä with mapik) without niqqud so
# we don't need to print it again).
if(substr($smichut,-1,1) eq "ä"){
$smichut=substr($smichut,0,-1);
}
outword $smichut."ehå", "ò,éçéã,ùì/äåà";
# TODO: maybe add the "eha" inflection? But it won't generate anything
# different from the ah below...
#outword $smichut."eha" unless $no_ah;
}
unless ($opts{"àéï_ëéðåéé_éçéã"}){
outword $smichut."é", "ò,éçéã,ùì/àðé"; # possessives (kinu'im)
outword $smichut."eðå", "ò,éçéã,ùì/àðçðå";
outword $smichut."ê", "ò,éçéã,ùì/àúä";
outword $smichut."", "ò,éçéã,ùì/àú";
outword $smichut."ëí", "ò,éçéã,ùì/àúí";
outword $smichut."ëï", "ò,éçéã,ùì/àúï";
outword $smichut."å", "ò,éçéã,ùì/äåà";
outword $smichut."ah", "ò,éçéã,ùì/äéà";
outword $smichut."", "ò,éçéã,ùì/äï";
outword $smichut."", "ò,éçéã,ùì/äí";
}
}
# related plural noun forms
# note: don't combine the $plural_.. ifs, nor use elsif, because some
# nouns have more than one plural forms.
if($plural_im){
my $xword=$word;
if(substr($xword,-1,1) eq "ä" && !$opts{"ùîåø_ú"}){
# remove final "he" (not "tav", unlike the "ot" pluralization below)
# before adding the "im" pluralization, unless the ùîåø_ú option was
# given.
$xword=substr($xword,0,-1);
}
my $xword_orig=$xword;
if($opts{"àáã_å"}){
# when the àáã_å flag was given,we remove the first "em kri'a" from
# the word in most of the inflections. (see a discussion of this
# option in wolig.dat).
$xword =~ s/å//o;
}
outword $xword."éí", "ò,øáéí";
$smichut=$xword;
my $smichut_orig=$xword_orig;
unless ($opts{"àéï_ðèéåú_øáéí"}){
outword $smichut_orig."é-", "ò,øáéí,ñîéëåú"; # smichut
}
# (We write patach followed by a consonant yud as "y", and later this will
# give us the chance to automatically double it as necessary by the
# Academia's ktiv male rules)
unless ($opts{"àéï_ëéðåéé_øáéí"}||$opts{"àéï_ðèéåú_øáéí"}){
outword $smichut."y", "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
outword $smichut."éðå", "ò,øáéí,ùì/àðçðå";
outword $smichut."éê", "ò,øáéí,ùì/àúä";
outword $smichut."", "ò,øáéí,ùì/àú";
outword $smichut_orig."éëí", "ò,øáéí,ùì/àúí";
outword $smichut_orig."éëï", "ò,øáéí,ùì/àúï";
outword $smichut."éå", "ò,øáéí,ùì/äåà";
outword $smichut."éä", "ò,øáéí,ùì/äéà";
outword $smichut_orig."éäï", "ò,øáéí,ùì/äï";
outword $smichut_orig."éäí", "ò,øáéí,ùì/äí";
}
}
if($plural_iim || $opts{"æåâé"}){
# The difference between æåâé and ééí is that æåâé adds only the "ééí"
# plural, while ééí adds the plural and its inflections. For example,
# for ùðúééí, éåîééí, ùòúééí, ùáåòééí, ð÷åãúééí, one would never say
# ùðúéé (my two years); On the other hand for other words ééí and all
# the inflections it implies makes sense, e.g., consider öéôåøðééí,
# ùôúééí, ÷øðééí.
my $xword=$word;
if(substr($xword,-1,1) eq "ä"){
# Change final he into tav before adding the "iim" pluralization.
$xword=substr($xword,0,-1)."ú";
}
my $xword_orig=$xword;
outword $xword."", "ò,øáéí";
$smichut=$xword;
my $smichut_orig=$xword_orig;
unless ($opts{"àéï_ðèéåú_øáéí"} || !$plural_iim){
outword $smichut_orig."é-", "ò,øáéí,ñîéëåú"; # smichut
}
unless ($opts{"àéï_ëéðåéé_øáéí"}||$opts{"àéï_ðèéåú_øáéí"} || !$plural_iim){
outword $smichut."y", "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
outword $smichut."éðå", "ò,øáéí,ùì/àðçðå";
outword $smichut."éê", "ò,øáéí,ùì/àúä";
outword $smichut."", "ò,øáéí,ùì/àú";
outword $smichut_orig."éëí", "ò,øáéí,ùì/àúí";
outword $smichut_orig."éëï", "ò,øáéí,ùì/àúï";
outword $smichut."éå", "ò,øáéí,ùì/äåà";
outword $smichut."éä", "ò,øáéí,ùì/äéà";
outword $smichut_orig."éäï", "ò,øáéí,ùì/äï";
outword $smichut_orig."éäí", "ò,øáéí,ùì/äí";
}
}
if($plural_ot){
my $xword=$word;
if(substr($xword,-1,1) eq "ä" || substr($xword,-1,1) eq "ú"){
# remove final "he" or "tav" before adding the "ot" pluralization,
# unless the ùîåø_ú option was given.
if(!$opts{"ùîåø_ú"}){
$xword=substr($xword,0,-1);
}
}
if($opts{"àáã_å"}){
# In segoliim with cholam chaser chat that inflect like feminines
# (i.e., the plural_ot case), the cholam is lost *only* in the base
# plural, not in other plural inflection. This is comparable to the
# inflections of the word îìëä, where the patach is lost only in the
# base plural.
# See for example âåøï, ãåôï.
my $tmp = $xword;
$tmp =~ s/å//o;
outword $tmp."åú", "ò,øáéí";
} else {
outword $xword."åú", "ò,øáéí";
}
$smichut=$xword."åú";
unless ($opts{"àéï_ðèéåú_øáéí"}){
outword $smichut."-", "ò,øáéí,ñîéëåú"; # smichut
}
unless ($opts{"àéï_ëéðåéé_øáéí"}||$opts{"àéï_ðèéåú_øáéí"}){
outword $smichut."y", "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
outword $smichut."éðå", "ò,øáéí,ùì/àðçðå";
outword $smichut."éê", "ò,øáéí,ùì/àúä";
outword $smichut."", "ò,øáéí,ùì/àú";
outword $smichut."éëí", "ò,øáéí,ùì/àúí";
outword $smichut."éëï", "ò,øáéí,ùì/àúï";
outword $smichut."éå", "ò,øáéí,ùì/äåà";
outword $smichut."éä", "ò,øáéí,ùì/äéà";
outword $smichut."éäï", "ò,øáéí,ùì/äï";
outword $smichut."éäí", "ò,øáéí,ùì/äí";
}
}
if($plural_iot){
my $xword=$word;
if(substr($xword,-1,1) eq "ä" || substr($xword,-1,1) eq "ú"){
# remove final "he" or "tav" before adding the "iot" pluralization,
# unless the ùîåø_ú option was given.
if(!$opts{"ùîåø_ú"}){
$xword=substr($xword,0,-1);
}
}
outword $xword."éåú", "ò,øáéí";
$smichut=$xword."éåú";
unless ($opts{"àéï_ðèéåú_øáéí"}){
outword $smichut."-", "ò,øáéí,ñîéëåú"; # smichut
}
unless ($opts{"àéï_ëéðåéé_øáéí"}||$opts{"àéï_ðèéåú_øáéí"}){
outword $smichut."y", "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
outword $smichut."éðå", "ò,øáéí,ùì/àðçðå";
outword $smichut."éê", "ò,øáéí,ùì/àúä";
outword $smichut."", "ò,øáéí,ùì/àú";
outword $smichut."éëí", "ò,øáéí,ùì/àúí";
outword $smichut."éëï", "ò,øáéí,ùì/àúï";
outword $smichut."éå", "ò,øáéí,ùì/äåà";
outword $smichut."éä", "ò,øáéí,ùì/äéà";
outword $smichut."éäï", "ò,øáéí,ùì/äï";
outword $smichut."éäí", "ò,øáéí,ùì/äí";
}
}
if($plural_xot){
my $xword=$word;
if(substr($xword,-1,1) eq "ä" || substr($xword,-1,1) eq "ú"){
# remove final "he" or "tav" before adding the "xot" pluralization,
# unless the ùîåø_ú option was given.
if(!$opts{"ùîåø_ú"}){
$xword=substr($xword,0,-1);
}
}
outword $xword."àåú", "ò,øáéí";
$smichut=$xword."àåú";
unless ($opts{"àéï_ðèéåú_øáéí"}){
outword $smichut."-", "ò,øáéí,ñîéëåú"; # smichut
}
unless ($opts{"àéï_ëéðåéé_øáéí"}||$opts{"àéï_ðèéåú_øáéí"}){
outword $smichut."y", "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
outword $smichut."éðå", "ò,øáéí,ùì/àðçðå";
outword $smichut."éê", "ò,øáéí,ùì/àúä";
outword $smichut."", "ò,øáéí,ùì/àú";
outword $smichut."éëí", "ò,øáéí,ùì/àúí";
outword $smichut."éëï", "ò,øáéí,ùì/àúï";
outword $smichut."éå", "ò,øáéí,ùì/äåà";
outword $smichut."éä", "ò,øáéí,ùì/äéà";
outword $smichut."éäï", "ò,øáéí,ùì/äï";
outword $smichut."éäí", "ò,øáéí,ùì/äí";
}
}
if($plural_bizarre){
# User specified plural for bizarre cases; For example, the plural of
# öì is öììéí, the plural of áú is áðåú.
# We take the fully formed plural from the user, and may need to take
# of the ending to guess the smichut and possesives (letting the user
# override the smichut forms too).
my $plural=$opts{"øáéí"};
#outword $plural, "ò,øáéí";
outword((exists($opts{"ðôøãéí"}) ? $opts{"ðôøãéí"} : $plural), "ò,øáéí");
# Overriding the plural nishmach with the ðñîëéí option: David Yalin,
# In his book ã÷ãå÷ äìùåï äòáøéú (1942) explains in page 207 how some
# of the kinuyim are known as "kinuyey hanifrad" and some "kinuyey
# hanishmach" because when the nismach and nifrad differ, they follow
# different ones. This is important for words like úéù, and in fact
# the àáã_å option does basically the same thing.
my $smichut_orig;
unless ($opts{"àéï_ðèéåú_øáéí"}){
if(substr($plural,-2,2) eq "åú"){
$smichut_orig= exists($opts{"ðñîëéí"}) ? $opts{"ðñîëéí"} : $plural;
# as David Yalin explains (ibid.): "öøéê ìäòéø ëé áùîåú ùñéîï äøéáåé
# ùìäí äåà -åú ðåèéí ëì ëéðåéé äøáéí àçøé öåøú äñîéëåú".
$smichut=$smichut_orig;
outword $smichut_orig."-", "ò,øáéí,ñîéëåú"; # smichut
} elsif(substr($plural,-2,2) eq "éí" || substr($plural,-2,2) eq "éï"){
$smichut=substr($plural,0,-2);
# the removal of the final yod from ðñîëéí is a bit silly... maybe
# we should have had a î÷åø_ðñîëéí option and ask it without yod.
$smichut_orig= exists($opts{"ðñîëéí"}) ?
substr($opts{"ðñîëéí"},0,-1) : $smichut;
outword $smichut_orig."é-", "ò,øáéí,ñîéëåú"; # smichut
} else {
#die "Plural given for $word is of unrecognized form: $plural.";
# An unrecognized plural form, so we don't know how to construct the
# construct forms from it. Just ignore them.
$opts{"àéï_ëéðåéé_øáéí"}=1;
}
}
unless ($opts{"àéï_ëéðåéé_øáéí"}||$opts{"àéï_ðèéåú_øáéí"}){
outword $smichut."y", "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
outword $smichut."éðå", "ò,øáéí,ùì/àðçðå";
outword $smichut."éê", "ò,øáéí,ùì/àúä";
outword $smichut."", "ò,øáéí,ùì/àú";
outword $smichut_orig."éëí", "ò,øáéí,ùì/àúí";
outword $smichut_orig."éëï", "ò,øáéí,ùì/àúï";
outword $smichut."éå", "ò,øáéí,ùì/äåà";
outword $smichut."éä", "ò,øáéí,ùì/äéà";
outword $smichut_orig."éäï", "ò,øáéí,ùì/äï";
outword $smichut_orig."éäí", "ò,øáéí,ùì/äí";
}
}
} elsif($opts{"ú"}){
############################# adjective ##################################
$detail_prefix="";
# preprocess the word the user has given, converting certain ktiv male
# constructs into markers (w, y) that we can better work with later (see
# comments in inword() about what it does).
$word=inword($word);
# A preprocessing rule special for adjectives: a final yud will always be
# a chirik male, not some sort of consonant yud or another vowel. Together
# with the iy post-transformation in outword, this makes ùðé - ùðééä work
# correctly. However, when the word ends with åé (and not ååé) we assume
# this is shuruk followed by a consonant yud (for example, îöåé). In
# words that do end in ååé and the åå is not a consonant we must use a
# w explictly, (e.g. øååé should be written explictly as øwåé).
if($word =~ m/([^aeiå]|åå)é$/o){
substr($word,-1,1) = "";
}
my $xword=$word;
if(substr($xword,-1,1) eq "ä"){
# remove final "he" before adding the pluralization,
# unless the ùîåø_ä option was given.
if(!$opts{"ùîåø_ä"}){
$xword=substr($xword,0,-1);
}
}
if($opts{"òí"}){
# For nationality adjectives (always adding in yud!), there is a seperate
# plural for the people of that nationality (rather than other objects
# from that country), with only í added. There's also a country name,
# and sometimes a female-person form too (ð÷áä_ä). We these here,
# instead of seperately in extrawords, so that the country list can be
# organized nicely at one place.
if(exists($opts{"àøõ"})){
outword $opts{"àøõ"}, "ò,ôøèé,ð" if($opts{"àøõ"} ne "") # country name
} elsif(substr($word,-3,3) eq "àié"){
outword substr($word,0,-3)."ä", "ò,ôøèé,ð"; # country name
} else {
$country = $word;
$country =~ s/i?é$//;
$country =~ s/([ëîðôö])$/$fin{$1}/;
outword $country, "ò,ôøèé,ð"; # country name
}
outword $word."í", "ò,øáéí,æ"; # plural (people of that nationality)
$opts{"ð÷áä_ú"}=1; # for enabling ú plural. adding ä plural is optional.
}
if(!exists($opts{"éçéã"})){
outword $word, "ú,éçéã,æ"; # masculin, singular
outword $word."-", "ú,éçéã,æ,ñîéëåú"; # smichut (same as nifrad)
} else {
outword $opts{"éçéã"}, "ú,éçéã,æ"; # masculin, singular
outword $opts{"éçéã"}."-", "ú,éçéã,æ,ñîéëåú"; # smichut (same as nifrad)
}
if($opts{"í"}){
# special case for adjectives like øùàé. Unlike the noun case where we
# turn this option automatically for words ending with àé, here such a
# default would not be useful because a lot of nouns ending with ä or à
# correspond to adjectives ending with àé that this rule doesn't fit.
outword $xword."í", "ú,øáéí,æ"; # masculin, plural
outword $xword."-", "ú,øáéí,æ,ñîéëåú"; # smichut
} else {
outword $xword."éí", "ú,øáéí,æ"; # masculin, plural
outword $xword."é-", "ú,øáéí,æ,ñîéëåú"; # smichut
}
# feminine, singular:
my $nekeva_implicit = !($opts{"ð÷áä_ú"} || $opts{"ð÷áä_ä"} ||
$opts{"ð÷áä_éú"} || $opts{"éçéãä"});
# by checking for final ié, we're basically checking for final é except
# in final åé (see comment above on where we added the i)
my $nekeva_t = $opts{"ð÷áä_ú"} ||
($nekeva_implicit && substr($xword,-2,2) eq "");
my $nekeva_h = $opts{"ð÷áä_ä"} ||
($nekeva_implicit && !$nekeva_t);
my $nekeva_it = $opts{"ð÷áä_éú"};
if(exists($opts{"éçéãä"})){
my $yechida=$opts{"éçéãä"};
outword $yechida, "ú,éçéã,ð";
$yechida =~ s/ä$/ú/ if(!$opts{"ùîåø_ä"});
outword $yechida."-", "ú,éçéã,ð,ñîéëåú";
}
if($nekeva_t){
if(substr($word,-1,1) eq "ä" && !$opts{"ùîåø_ä"}){
# This is a rare case, where an adjective ending with ä gets a ú
# feminine form, and an extra yud needs to be added. For example
# îåôìä, îåôìéú.
outword $xword."éú", "ú,éçéã,ð";
outword $xword."éú-", "ú,éçéã,ð,ñîéëåú"; # smichut (same as nifrad)
} else {
# note: we don't bother adding the vowel "e" before the ú because that
# would only make a difference before a yud - and interestingly when
# there *is* a yud, the vowel is dropped anyway!
outword $xword."ú", "ú,éçéã,ð";
outword $xword."ú-", "ú,éçéã,ð,ñîéëåú"; # smichut (same as nifrad)
}
}
if($nekeva_h){
outword $xword."", "ú,éçéã,ð";
outword $xword."aú-", "ú,éçéã,ð,ñîéëåú"; # smichut
}
if($nekeva_it){
outword $xword."éú", "ú,éçéã,ð";
outword $xword."éú-", "ú,éçéã,ð,ñîéëåú"; # smichut
}
# Feminine, plural:
# It stays the same, regardless of the singular for. The only exception
# is the éú feminine, where the plural becomes éåú. Note that there is
# no "else" in the if below - because we need to support the cased that
# one word has both types of plural (e.g., see àäáì).
if($nekeva_h || $nekeva_t || $opts{"éçéãä"}){
outword $xword."åú", "ú,øáéí,ð"; # feminine, plural
outword $xword."åú-", "ú,øáéí,ð,ñîéëåú"; # smichut (same as nifrad)
}
if($nekeva_it){
outword $xword."éåú", "ú,øáéí,ð"; # feminine, plural
outword $xword."éåú-", "ú,øáéí,ð,ñîéëåú"; # smichut (same as nifrad)
}
} else {
die "word '".$word."' was not specified as noun, adjective or verb.";
}
outword "-------"
}