wolig.pl

#!/usr/bin/perl -w
#
# Copyright (C) 2000-2002 Nadav Har'El, Dan Kenigsberg
#
use Carp;
use FileHandle;

my $detailed_output=0;
my $detail_prefix;

sub outword {
  my $word = shift;
  my $details = shift;

  # "*" sign used to signify non-existant word that should not be output.
  # It will allow us to more-easily drop words without huge if()s.
  return if $word =~ m/^\*/;

  # change otiot-sofiot in the middle of the word
  # (the silly a-z was added for our special "y" and "w" marks).
  $word =~ s/ê(?=[à-úa-z])/ë/go;
  $word =~ s/ï(?=[à-úa-z])/ð/go;
  $word =~ s/í(?=[à-úa-z])/î/go;
  $word =~ s/õ(?=[à-úa-z])/ö/go;
  $word =~ s/ó(?=[à-úa-z])/ô/go;

  # change special consonant marks into the proper Hebrew letters, using
  # proper ktiv male rules.

  # Note that the order of these conversion is important. Since they have
  # the potential of changing so many words, it is highly recommended to
  # diff the output files before and after the change, to see that no
  # unexpected words got changed.

  # The vowel markers 'a' and 'e' do nothing except to a yud (chirik male) -
  # which turns it into a consonant yud; For example your(feminine) öé is
  # öééê (tsere in the yud, so it's a consonant and doubled) and
  # your(masculine) öé is öéê (yud is chirik male, and not doubled)
  $word =~ s/é[ea]/y/go;
  $word =~ s/[ea]//go;

  # The vowel 'i' is a chirik chaser - it should be followed by a yud if
  # necessary. We do nothing with it currently - it's only useful for words
  # like ñðàié where we want to make sure that wolig.pl does not think this
  # is the normal patach-aleph-yud (with no niqqud under the aleph) case as
  # in úðàé.
  # The first rule here is useful for transformation from ùðé to ùðééä, via
  # ùðé adj-inword> ùðié feminine> ùðiéaä outword> ùðiyä outword> ùðééä
  $word =~ s/iy/éé/go;  # useful in stuff like ùðié - ùðééä
  $word =~ s/i//go;

  # Y is the same as y, except it is not translated to a double-yud (but rather
  # to a single yud) when it is the last letter of the word. It's used in words
  # like çåìé in which the original form of the word has a chirik male, but in
  # all the inflections the yud from the chirik becomes a fully-fleged
  # consonant. We do not need a similar trick for vav (w), because the
  # Academia's rules do not do anything to a vav at the end of the word,
  # contrary to what happens to a yud.
  # I'm not sure this trick is "kosher" (based on the language), but it does
  # work...
  $word =~ s/Y($|(?=-))/é/go;  # Y's at the end of the word
  $word =~ s/Y/y/go;       # the rest of the Y's are converted to y's

  # The first conversion below implements the akademia's rule that a chirik
  # before a yå should not be written with a é. So we convert éyå into éå.
  # IDEA: to be more certain that the first é functions as a chirik, it would
  # have been better to use the i character: in addition to the éä -> yä rule
  # we have in the beginning of processing a word, we should do ééä -> iyä.
  # Then here the rule would convert iyå, not éyå. [but everything is working
  # well even without this idea]
  $word =~ s/éyå/éå/go;
  $word =~ s/(?<=[^åéy])y(?=[^åéyä]|$)/éé/go;
  $word =~ s/y/é/go;                      # otherwise, just one yud.

  # The first conversion below of åw to å has an interesting story. In the
  # original Hebrew, the consonant å sounded like the English w or Arabic
  # waw. An "u" sound (a kubuts, which we mark by å) followed by this w
  # sound sounded like a long "u", which was later written with a shuruk,
  # i.e., one vav. This conversion is very useful for understanding how the
  # word ùå÷ is inflected (see explanation in wolig.dat).
  $word =~ s/åw/å/go;
  $word =~ s/(?<=[^åw])w(?=[^åw-])/åå/go;  # if vav needs to be doubled, do it
  $word =~ s/w/å/go;                       # otherwise, just one vav.


  # A consonant ä (h) is always output as a ä. The only reason we are
  # interested in which ä is consonant is to allow the rules earlier to double
  # yud next to a consonant ä (i.e.. h), but not next to a em-kria ä.
  # For example, compare àøéä (lion) and àøééä (her lion).
  $word =~ s/h/ä/go;

  if($detailed_output && defined($details)){
    $word =~ s/-$//;  # smichut is already known by the details...
    $word .= " ".$detail_prefix.$details;
  }
  print $word."\n";
}

sub inword {
  # For some constructs built of àäåé in end or beginnings of words, we can
  # immediately guess that these must be consonants (and not vowels) and make
  # use of that knowledge by changing the Hebrew letters into the markers
  # "w", "y" we use for consonants å and é respectively.
  #
  # This function takes a word as inputted from wolig.dat, presumably written
  # in ktiv male, and makes a few predictions, such as that a vav in the
  # beginning of the word must be a consonant. Predictions that appear here
  # must have two traits:
  # 1. They must be useful for the correct inflection of some word.
  #    For example, realising that the åå at the end of îæååä is a consonant
  #    help us later avoid the false inflection îæååå and instead generate
  #    the correct îæåå.
  # 2. They must be correct in 100% of the cases. For example, a rule saying
  #    that every appearance of åå in the input is a consonant (w) is wrong,
  #    because of words like öéååé.
  #    However, the rules only have to "appear" correct (for all the actual
  #    words in wolig.dat), not necessarily be linguisticly correct. For
  #    example, we'll see below a rule that a å at the end of a word is a
  #    consonant (w). This is indeed true for most nouns (öå, î÷åå÷å), but not
  #    for àçå. However, all of àçå's inflections have a consonant vav, and in
  #    the word itself we don't really care about mislabeling it "consonant"
  #    because a vav at the end of the word isn't doubled anyway under the
  #    Academia's rules.
  #
  # Actually the second rule can be relaxed a bit if we provide alternative
  # ways to input a certain construct. For example, if "u" could signify a
  # vowel vav in the input, then we wouldn't really care if in a few rare cases
  # we wrongly decide a certain vav to be consonant: the user could override
  # this decision by putting a "u" explicitly, instead of the vav, in the
  # input file.

  my $word = shift;
  if(substr($word,0,1) eq "å"){
    # A word cannot start with a shuruk or kubuts!
    substr($word,0,1)="w";
  }
  if(substr($word,-4,4) eq "ååéä"){
    # A word like çååéä, äìååéä, èøéååéä. I can't imagine any base noun (or
    # adjective) for which such a double-vav isn't a consonant but rather
    # a vav and shuruk.
    substr($word,-4,2)="w";
  }
  if(substr($word,-1,1) eq "å"){
    # This vav is a consonant (see comment above about why the few exceptions
    # that do exist don't bother us).
    substr($word,-1,1)="w";
  } elsif(substr($word,-3,3) eq "ååä"){
    # If the word ends with ååä, the user wrote in ktiv male and intended
    # a consonant vav. Replace the åå by the character "w", which will be
    # doubled if necessary (for ktiv male) by outword. This change actually
    # makes a difference for the ñâåì_ä with åú cases: for example, the
    # word î÷ååä has a plural î÷ååú and his-possesive î÷åå. Without this
    # change, we get the incorrect possesive î÷ååå and plural î÷åååú.
    # Similarly it is needed for the adjective ðàååä's correct feminine plural.
    substr($word,-3,2)="w";
  } elsif(substr($word,-2,2) eq "éä"){
    substr($word,-2,1)="y";
    # TODO: maybe convert ééä (in ktiv male, e.g., ñåôâðééä) into iyä.
    # see outword above on a discussion about that. But everything also
    # works without this change.
  }
  return $word;
}

#############################################################################

my ($fh,$word,$optstring,%opts);

my $infile;
if($#ARGV < 0){
	$infile="wolig.dat";
} else {
	if($ARGV[0] eq "-d"){
		$detailed_output=!$detailed_output;
		shift @ARGV;
	}
	$infile=$ARGV[0];
}

$fh = new FileHandle $infile, "r"
  or croak "Couldn't open data file $infile for reading";
while(<$fh>){
  print if /^#\*/;        # print these comments.
  chomp;
  s/#.*$//o;              # comments start with '#'.
  next if /^[ 	]*$/o;	  # ignore blank lines.
  ($word,$optstring)=split;
  die "Type of word '".$word."' was not specified." if !defined($optstring);
  undef %opts;
  my $val;
  foreach $opt (split /,/o, $optstring){
    ($opt, $val) = (split /=/o, $opt);
    $val = 1 unless defined $val;
    $opts{$opt}=$val;
  }
  if($opts{"ò"}){
    ############################# noun ######################################
    # note that the noun may have several plural forms (see, for example,
    # àåú). When one of the plural forms isn't explicitly specified, wolig
    # tries to guess, based on simplistic heuristics that work for the majority
    # of the nouns (84% of them, at one time I counted).
    my $plural_none = $opts{"éçéã"} || substr($word,-3,3) eq "éåú";
    my $plural_bizarre = exists($opts{"øáéí"});
    my $plural_implicit = !($opts{"åú"} || $opts{"éí"} || $opts{"éåú"}
			   || $opts{"àåú"} || $opts{"ééí"} || $plural_none
			   || $plural_bizarre);
    my $plural_iot = $opts{"éåú"} ||
      ($plural_implicit && (substr($word,-2,2) eq "åú"));
    my $plural_xot = $opts{"àåú"};
    my $plural_ot = $opts{"åú"} ||
      ($plural_implicit && !$plural_iot && (substr($word,-1,1) eq "ä" || substr($word,-1,1) eq "ú" ));
    my $plural_im = $opts{"éí"} || ($plural_implicit && !$plural_ot && !$plural_iot);
    my $plural_iim = $opts{"ééí"};

    # Find gender for detailed output. This has nothing to do with word
    # inflection, it's just an added value of wolig.pl...
    if($detailed_output){
      my $gender;
      if($opts{"æëø"}){
        if($opts{"ð÷áä"}){
   	  $gender="æ,ð";
	} else {
	  $gender="æ";
	}
      } elsif($opts{"ð÷áä"}){
        $gender="ð"
      } elsif($opts{"ñâåì_ä"}){
        $gender="æ";
      } elsif((substr($word,-1,1) eq "ä") && !$opts{"àáã_å"}){
        $gender="ð";
      } elsif(substr($word,-1,1) eq "ú" && !$opts{"éí"}){
        $gender="ð";
      } else {
        $gender="æ";
      }
      $detail_prefix="$gender,";
    }

    # preprocess the word the user has given, converting certain ktiv male
    # constructs into markers (w, y) that we can better work with later (see
    # comments in inword() about what it does).
    $word=inword($word);

    # related singular noun forms
    if(exists $opts{"ðôøã"}){
      outword $opts{"ðôøã"}, "ò,éçéã";  # explicit override of the nifrad
    } elsif(!$opts{"àéï_éçéã"}){
      outword $word, "ò,éçéã"; # the singular noun itself
    }
    if($opts{"àáã_é"}){
      # in words like òéôøåï and äéøéåï the first yud (coming from chirik
      # or tsere in ktiv male) is lost in all but the base word
      $word =~ s/é//o;
    }
    my $smichut=$word;
    if($opts{"àéï_éçéã"} || $opts{"àéï_ðèéåú_éçéã"}){
      # We mark the singular words with "*", telling outword to drop them.
      # This makes the code look cleaner than a huge if statement around all
      # the singular code. Maybe in the future we should move the singular
      # inflection code to a seperate function, if() only around that, and
      # stop all that "*" nonsense.
      $smichut="*".$smichut;
    }
    #my $smichut_orig=$smichut;
    if($opts{"îéåçã_àç"}){
      # special case:
      # àç, àá, çí, ôä include an extra yod in the smichut. Note that in the
      # first person singular possessive, we should drop that extra yod.
      # For a "im" plural, it turns out to be the same inflections as the
      # plural - but this is not the case with a "ot" plural.
      # Interestingly, the yud in these inflections is always a chirik
      # male - it is never consonantal (never has a vowel on it).
      if(substr($smichut,-1,1) eq "ä"){
        # Remove the ä. Basically, only one word fits this case: ôä
	$smichut=substr($smichut,0,-1);
	# And add the extra third-person masuline possesive (just like the
	# ñâåì_ä case, but we don't bother to check for the ñâåì_ä flag here).
	outword $smichut."éäå", "ò,éçéã,ùì/äåà";
      }
      outword $smichut."é-",  "ò,éçéã,ñîéëåú"; # smichut
      outword $smichut."é",   "ò,éçéã,ùì/àðé"; # possessives (kinu'im)
      outword $smichut."éðå", "ò,éçéã,ùì/àðçðå";
      outword $smichut."éê",  "ò,éçéã,ùì/àúä";
      outword $smichut."éê",  "ò,éçéã,ùì/àú";
      outword $smichut."éëí", "ò,éçéã,ùì/àúí";
      outword $smichut."éëï", "ò,éçéã,ùì/àúï";
      outword $smichut."éå",  "ò,éçéã,ùì/äåà";
      outword $smichut."éä",  "ò,éçéã,ùì/äéà";
      outword $smichut."éäï", "ò,éçéã,ùì/äï";
      outword $smichut."éäí", "ò,éçéã,ùì/äí";
    } else {
      if(!$opts{"ñâåì_ä"}){ # replace final ä by ú, unless ñâåì_ä option
        if(substr($smichut,-1,1) eq "ä" && !$opts{"ñâåì_ä"}){
          substr($smichut,-1,1)="ú";
        }
      }
      if(exists($opts{"ðñîê"})){
        outword $opts{"ðñîê"}."-", "ò,éçéã,ñîéëåú";
      } else {
        outword $smichut."-", "ò,éçéã,ñîéëåú"; # smichut
      }
      if($opts{"îéåçã_ùï"}){
      	# academia's ktiv male rules indicate that the inflections of ùï
	# (at least the plural is explicitly mentioned...) should get an
	# extra yud - to make it easy to distinguish from the number ùðééí.
	substr($smichut,0,-1)=substr($smichut,0,-1).'é';
	substr($word,0,-1)=substr($word,0,-1).'é';
      }
      if(substr($word,-2,2) eq "àé" && length($word)>2){
      	# in words ending with patach and then the imot kria aleph yud,
	# such as úðàé and âáàé, all the inflections (beside the base word
	# and the smichut) are as if the yud wasn't there.
	# Note that words ending with àé but not patach, like àé and ñðàé,
	# should not get this treatment, so there should be an option to turn
	# it off.
	substr($word,-1,1)="";
	substr($smichut,-1,1)="";
      }
      # Note that the extra vowel markers, 'a' and 'e' are added for mele'im
      # ending with yud (e.g., àé) - this vowel attaches to the yud and makes
      # the yud a consonant. This phenomenon is handled in outword.
      my $no_ah=0;
      if($opts{"ñâåì_ä"}){
      	# the ä is dropped from the singular inflections, except one alternate
	# inflection like îåøäå (the long form of îåøå):
	# (there's another femenine inflection, îåøä with kamats on the he,
	# but this is spelled the same (as îåøä with mapik) without niqqud so
	# we don't need to print it again).
	if(substr($smichut,-1,1) eq "ä"){
	  $smichut=substr($smichut,0,-1);
	}
        outword $smichut."ehå", "ò,éçéã,ùì/äåà";
	# TODO: maybe add the "eha" inflection? But it won't generate anything
	# different from the ah below...
        #outword $smichut."eha" unless $no_ah;
      }
      outword $smichut."é",   "ò,éçéã,ùì/àðé"; # possessives (kinu'im)
      outword $smichut."eðå", "ò,éçéã,ùì/àðçðå";
      outword $smichut."ê",   "ò,éçéã,ùì/àúä";
      outword $smichut."eê",  "ò,éçéã,ùì/àú";
      outword $smichut."ëí",  "ò,éçéã,ùì/àúí";
      outword $smichut."ëï",  "ò,éçéã,ùì/àúï";
      outword $smichut."å",   "ò,éçéã,ùì/äåà";
      outword $smichut."ah",  "ò,éçéã,ùì/äéà";
      outword $smichut."aï",  "ò,éçéã,ùì/äï";
      outword $smichut."aí",  "ò,éçéã,ùì/äí";
    }
    # related plural noun forms
    # note: don't combine the $plural_.. ifs, nor use elsif, because some
    # nouns have more than one plural forms.
    if($plural_im){
      my $xword=$word;
      if(substr($xword,-1,1) eq "ä" && !$opts{"ùîåø_ú"}){
	# remove final "he" (not "tav", unlike the "ot" pluralization below)
	# before adding the "im" pluralization, unless the ùîåø_ú option was
	# given.
	$xword=substr($xword,0,-1);
      }
      my $xword_orig=$xword;
      if($opts{"àáã_å"}){
	# when the àáã_å flag was given,we remove the first "em kri'a" from
	# the word in most of the inflections. (see a discussion of this
	# option in wolig.dat).
	$xword =~ s/å//o;
      }
      outword $xword."éí", "ò,øáéí";
      $smichut=$xword;
      my $smichut_orig=$xword_orig;
      outword $smichut_orig."é-", "ò,øáéí,ñîéëåú"; # smichut
      # (We write patach followed by a consonant yud as "y", and later this will
      # give us the chance to automatically double it as necessary by the
      # Academia's ktiv male rules)
      outword $smichut."y",        "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
      outword $smichut."éðå",      "ò,øáéí,ùì/àðçðå";
      outword $smichut."éê",       "ò,øáéí,ùì/àúä";
      outword $smichut."yê",       "ò,øáéí,ùì/àú";
      outword $smichut_orig."éëí", "ò,øáéí,ùì/àúí";
      outword $smichut_orig."éëï", "ò,øáéí,ùì/àúï";
      outword $smichut."éå",       "ò,øáéí,ùì/äåà";
      outword $smichut."éä",       "ò,øáéí,ùì/äéà";
      outword $smichut_orig."éäï", "ò,øáéí,ùì/äï";
      outword $smichut_orig."éäí", "ò,øáéí,ùì/äí";
    }
    if($plural_iim){
      # I currently decided that in Hebrew, unlike Arabic, only specific
      # nouns can get the iim (zugi) pluralization, and most nouns can't,
      # e.g., çúåìééí isn't correct (for "two cats") despite a story called
      # îòùä áçúåìééí. This is why this is an option, and not the default.
      my $xword=$word;
      if(substr($xword,-1,1) eq "ä"){
	# Change final he into tav before adding the "iim" pluralization.
	$xword=substr($xword,0,-1)."ú";
      }
      my $xword_orig=$xword;
      outword $xword."yí", "ò,øáéí";
      $smichut=$xword;
      my $smichut_orig=$xword_orig;
      outword $smichut_orig."é-", "ò,øáéí,ñîéëåú"; # smichut
      outword $smichut."y",        "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
      outword $smichut."éðå",      "ò,øáéí,ùì/àðçðå";
      outword $smichut."éê",       "ò,øáéí,ùì/àúä";
      outword $smichut."yê",       "ò,øáéí,ùì/àú";
      outword $smichut_orig."éëí", "ò,øáéí,ùì/àúí";
      outword $smichut_orig."éëï", "ò,øáéí,ùì/àúï";
      outword $smichut."éå",       "ò,øáéí,ùì/äåà";
      outword $smichut."éä",       "ò,øáéí,ùì/äéà";
      outword $smichut_orig."éäï", "ò,øáéí,ùì/äï";
      outword $smichut_orig."éäí", "ò,øáéí,ùì/äí";
    }
    if($plural_ot){
      my $xword=$word;
      if(substr($xword,-1,1) eq "ä" || substr($xword,-1,1) eq "ú"){
	# remove final "he" or "tav" before adding the "ot" pluralization,
	# unless the ùîåø_ú option was given.
	if(!$opts{"ùîåø_ú"}){
	  $xword=substr($xword,0,-1);
	}
      }
      if($opts{"àáã_å"}){
      	# In segoliim with cholam chaser chat that inflect like feminines
	# (i.e., the plural_ot case), the cholam is lost *only* in the base
	# plural, not in other plural inflection. This is comparable to the
	# inflections of the word îìëä, where the patach is lost only in the
	# base plural.
	# See for example âåøï, ãåôï.
	my $tmp = $xword;
	$tmp =~ s/å//o;
      	outword $tmp."åú",    "ò,øáéí";
      } else {
        outword $xword."åú",  "ò,øáéí";
      }
      
      $smichut=$xword."åú";
      outword $smichut."-",   "ò,øáéí,ñîéëåú"; # smichut
      outword $smichut."y",   "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
      outword $smichut."éðå", "ò,øáéí,ùì/àðçðå";
      outword $smichut."éê",  "ò,øáéí,ùì/àúä";
      outword $smichut."yê",  "ò,øáéí,ùì/àú";
      outword $smichut."éëí", "ò,øáéí,ùì/àúí";
      outword $smichut."éëï", "ò,øáéí,ùì/àúï";
      outword $smichut."éå",  "ò,øáéí,ùì/äåà";
      outword $smichut."éä",  "ò,øáéí,ùì/äéà";
      outword $smichut."éäï", "ò,øáéí,ùì/äï";
      outword $smichut."éäí", "ò,øáéí,ùì/äí";
    }
    if($plural_iot){
      my $xword=$word;
      if(substr($xword,-1,1) eq "ä" || substr($xword,-1,1) eq "ú"){
	# remove final "he" or "tav" before adding the "iot" pluralization,
	# unless the ùîåø_ú option was given.
	if(!$opts{"ùîåø_ú"}){
	  $xword=substr($xword,0,-1);
	}
      }
      outword $xword."éåú",   "ò,øáéí";
      $smichut=$xword."éåú";
      outword $smichut."-",   "ò,øáéí,ñîéëåú"; # smichut
      outword $smichut."y",   "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
      outword $smichut."éðå", "ò,øáéí,ùì/àðçðå";
      outword $smichut."éê",  "ò,øáéí,ùì/àúä";
      outword $smichut."yê",  "ò,øáéí,ùì/àú";
      outword $smichut."éëí", "ò,øáéí,ùì/àúí";
      outword $smichut."éëï", "ò,øáéí,ùì/àúï";
      outword $smichut."éå",  "ò,øáéí,ùì/äåà";
      outword $smichut."éä",  "ò,øáéí,ùì/äéà";
      outword $smichut."éäï", "ò,øáéí,ùì/äï";
      outword $smichut."éäí", "ò,øáéí,ùì/äí";
    }
    if($plural_xot){
      my $xword=$word;
      if(substr($xword,-1,1) eq "ä" || substr($xword,-1,1) eq "ú"){
	# remove final "he" or "tav" before adding the "xot" pluralization,
	# unless the ùîåø_ú option was given.
	if(!$opts{"ùîåø_ú"}){
	  $xword=substr($xword,0,-1);
	}
      }
      outword $xword."àåú",   "ò,øáéí";
      $smichut=$xword."àåú";
      outword $smichut."-",   "ò,øáéí,ñîéëåú"; # smichut
      outword $smichut."y",   "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
      outword $smichut."éðå", "ò,øáéí,ùì/àðçðå";
      outword $smichut."éê",  "ò,øáéí,ùì/àúä";
      outword $smichut."yê",  "ò,øáéí,ùì/àú";
      outword $smichut."éëí", "ò,øáéí,ùì/àúí";
      outword $smichut."éëï", "ò,øáéí,ùì/àúï";
      outword $smichut."éå",  "ò,øáéí,ùì/äåà";
      outword $smichut."éä",  "ò,øáéí,ùì/äéà";
      outword $smichut."éäï", "ò,øáéí,ùì/äï";
      outword $smichut."éäí", "ò,øáéí,ùì/äí";
    }
    if($plural_bizarre){
      # User specified plural for bizarre cases; For example, the plural of
      # öì is öììéí, the plural of áú is áðåú.
      # We take the fully formed plural from the user, and may need to take
      # of the ending to guess the smichut and possesives (letting the user
      # override the smichut forms too).
      my $plural=$opts{"øáéí"};
      outword $plural, "ò,øáéí";
      # Overriding the plural nishmach with the ðñîëéí option: David Yalin,
      # In his book ã÷ãå÷ äìùåï äòáøéú (1942) explains in page 207 how some
      # of the kinuyim are known as "kinuyey hanifrad" and some "kinuyey
      # hanishmach" because when the nismach and nifrad differ, they follow
      # different ones. This is important for words like úéù, and in fact
      # the àáã_å option does basically the same thing.
      my $smichut_orig;
      if(substr($plural,-2,2) eq "åú"){
	$smichut_orig= exists($opts{"ðñîëéí"}) ? $opts{"ðñîëéí"} : $plural;
	# as David Yalin explains (ibid.): "öøéê ìäòéø ëé áùîåú ùñéîï äøéáåé
	# ùìäí äåà -åú ðåèéí ëì ëéðåéé äøáéí àçøé öåøú äñîéëåú".
        $smichut=$smichut_orig;
        outword $smichut_orig."-", "ò,øáéí,ñîéëåú"; # smichut
      } elsif(substr($plural,-2,2) eq "éí" || substr($plural,-2,2) eq "éï"){
        $smichut=substr($plural,0,-2);
	# the removal of the final yod from ðñîëéí is a bit silly... maybe
	# we should have had a î÷åø_ðñîëéí option and ask it without yod.
	$smichut_orig= exists($opts{"ðñîëéí"}) ?
		substr($opts{"ðñîëéí"},0,-1) : $smichut;
        outword $smichut_orig."é-", "ò,øáéí,ñîéëåú"; # smichut
      } else {
        die "Plural given for $word is of unrecognized form: $plural.";
      }
      outword $smichut."y",        "ò,øáéí,ùì/àðé"; # possessives (kinu'im)
      outword $smichut."éðå",      "ò,øáéí,ùì/àðçðå";
      outword $smichut."éê",       "ò,øáéí,ùì/àúä";
      outword $smichut."yê",       "ò,øáéí,ùì/àú";
      outword $smichut_orig."éëí", "ò,øáéí,ùì/àúí";
      outword $smichut_orig."éëï", "ò,øáéí,ùì/àúï";
      outword $smichut."éå",       "ò,øáéí,ùì/äåà";
      outword $smichut."éä",       "ò,øáéí,ùì/äéà";
      outword $smichut_orig."éäï", "ò,øáéí,ùì/äï";
      outword $smichut_orig."éäí", "ò,øáéí,ùì/äí";
    }
  } elsif($opts{"ú"}){
    ############################# adjective ##################################
    $detail_prefix="";
    # preprocess the word the user has given, converting certain ktiv male
    # constructs into markers (w, y) that we can better work with later (see
    # comments in inword() about what it does).
    $word=inword($word);
    # A preprocessing rule special for adjectives: a final yud will always be
    # a chirik male, not some sort of consonant yud or another vowel. Together
    # with the iy post-transformation in outword, this makes ùðé - ùðééä work
    # correctly. However, when the word ends with åé (and not ååé) we assume
    # this is shuruk followed by a consonant yud (for example, îöåé). In
    # words that do end in ååé and the åå is not a consonant we must use a
    # w explictly, (e.g. øååé should be written explictly as øwåé).
    if($word =~ m/([^aeiå]|åå)é$/o){
      substr($word,-1,1) = "ié";
    }

    my $xword=$word;
    if(substr($xword,-1,1) eq "ä"){
      # remove final "he" before adding the pluralization,
      # unless the ùîåø_ä option was given.
      if(!$opts{"ùîåø_ä"}){
	$xword=substr($xword,0,-1);
      }
    }

    if($opts{"òí"}){
      # For nationality adjectives (always adding in yud!), there is a seperate
      # plural for the people of that nationality (rather than other objects
      # from that country), with only í added. There's also a country name,
      # and sometimes a female-person form too (ð÷áä_ä). We these here,
      # instead of seperately in extrawords, so that the country list can be
      # organized nicely at one place.
      if(exists($opts{"àøõ"})){
        outword $opts{"àøõ"}, "ò,ôøèé,ð" if($opts{"àøõ"} ne "") # country name
      } elsif(substr($word,-3,3) eq "àié"){
        outword substr($word,0,-3)."ä", "ò,ôøèé,ð";  # country name
      } else {
        $country = $word;
        $country =~ s/i?é$//; $country =~ s/î$/í/; $country =~ s/ð$/ï/;
	$country =~ s/ë$/ê/; $country =~ s/ô$/ó/; $country =~ s/ö$/õ/;
        outword $country, "ò,ôøèé,ð"; # country name
      }
      outword $word."í", "ò,øáéí,æ"; # plural (people of that nationality)
      $opts{"ð÷áä_ú"}=1; # for enabling ú plural. adding ä plural is optional.
    }

    if(!exists($opts{"éçéã"})){
      outword $word,     "ú,éçéã,æ"; # masculin, singular
      outword $word."-", "ú,éçéã,æ,ñîéëåú"; # smichut (same as nifrad)
    } else {
      outword $opts{"éçéã"},     "ú,éçéã,æ"; # masculin, singular
      outword $opts{"éçéã"}."-", "ú,éçéã,æ,ñîéëåú"; # smichut (same as nifrad)
    }
    if($opts{"í"}){
      # special case for adjectives like øùàé. Unlike the noun case where we
      # turn this option automatically for words ending with àé, here such a
      # default would not be useful because a lot of nouns ending with ä or à
      # correspond to adjectives ending with àé that this rule doesn't fit.
      outword $xword."í",  "ú,øáéí,æ"; # masculin, plural
      outword $xword."-",  "ú,øáéí,æ,ñîéëåú"; # smichut
    } else {
      outword $xword."éí", "ú,øáéí,æ"; # masculin, plural
      outword $xword."é-", "ú,øáéí,æ,ñîéëåú"; # smichut
    }
    # feminine, singular:
    if($opts{"ð÷áä_éú"}){
      # This is an ad-hoc treatment of the nekeva_it option, which cannot be
      # combined with others because we will only have one plural form...
      $xword=$xword."é";
      $opts{"ð÷áä_ú"}=1;
    }
    my $nekeva_implicit = !($opts{"ð÷áä_ú"} || $opts{"ð÷áä_ä"});
    my $nekeva_t = $opts{"ð÷áä_ú"} ||
    		   ($nekeva_implicit && substr($xword,-1,1) eq "é");
    my $nekeva_h = $opts{"ð÷áä_ä"} ||
    		   ($nekeva_implicit && !$nekeva_t);
    if($nekeva_t){
      if(substr($word,-1,1) eq "ä" && !$opts{"ùîåø_ä"}){
        # This is a rare case, where an adjective ending with ä gets a ú
	# feminine form, and an extra yud needs to be added. For example
	# îåôìä, îåôìéú.
        outword $xword."éú",  "ú,éçéã,ð";
        outword $xword."éú-", "ú,éçéã,ð,ñîéëåú"; # smichut (same as nifrad)
      } else {
        # note: we don't bother adding the vowel "e" before the ú because that
        # would only make a difference before a yud - and interestingly when
        # there *is* a yud, the vowel is dropped anyway!
        outword $xword."ú",   "ú,éçéã,ð";
        outword $xword."ú-",  "ú,éçéã,ð,ñîéëåú"; # smichut (same as nifrad)
      }
    }
    if($nekeva_h){
      outword $xword."aä",  "ú,éçéã,ð";
      outword $xword."aú-", "ú,éçéã,ð,ñîéëåú"; # smichut
    }
    outword $xword."åú",  "ú,øáéí,ð"; # feminine, plural
    outword $xword."åú-", "ú,øáéí,ð,ñîéëåú"; # smichut (same as nifrad)
  } else {
    die "word '".$word."' was not specified as noun, adjective or verb.";
  }
  outword "-------"
}