Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 1383 lines (1283 sloc) 43.6 KB
#!/usr/bin/perl -w
#
# Copyright (C) 2000-2010 Nadav Har'El, Dan Kenigsberg
#
use Carp;
my ($infile,$c);
my $detailed_output=0;
my %fin = ('ë'=>'ê', 'î'=>'í', 'ð'=>'ï', 'ô'=>'ó', 'ö'=>'õ');
if ($#ARGV>=0 && $ARGV[0] eq "-d")
{
$detailed_output=!$detailed_output;
shift @ARGV;
}
if($#ARGV < 0){
$infile="woo.dat";
} else {
$infile=$ARGV[0];
}
open(INFILE, "<$infile")
or croak "Couldn't open data file $infile for reading";
open (SHEMP, ">shemp.dat");
print SHEMP "# list of automatically generated shmot-peula\n";
while(<INFILE>){
print if /^#\*/; # print these comments,
print SHEMP $_ if /^#\*/; # and also to shemp.dat.
chomp;
next if /^( | )*$/; # ignore empty lines.
next if /^ *#/; # comments start with '#'.
#$c++; print STDERR "#" if !($c % 20);
s/ *\#.*$//; #and appear at end of lines.
($word,$optstring)=split;
undef %opts;
my $val;
foreach $opt (split /,/o, $optstring){
($opt, $val) = (split /=/o, $opt);
$val = 1 unless defined $val;
$val =~ tr/êíïóõ/ëîðôö/;
$opts{$opt}=$val;
}
if($opts{"ô"}){
$w = new Word;
$word =~ tr/êíïóõ/ëîðôö/;
$word =~ s/â'/J/;
$word =~ s/æ'/Z/;
$word =~ s/ö'/C/;
$word =~ s/ä$/h/o if $opts{"ùîåø_îôé÷"};
$word =~ tr/éå/yw/ if $opts{"ùîåø_òå"};
$w->root($word);
my @binyanim = ();
my %transitive = ();
$opts{"÷ì_àôòì"}=1 if $opts{"÷ì_àôòì+"};
$opts{"÷ì_àôòåì"}=1 if $opts{"÷ì_àôòåì+"};
$opts{"äô"}=1 if ($opts{"äô+"});
$opts{"ôé"}=1 if ($opts{"ôé+"});
push @binyanim, $Word::qal if ($opts{"÷ì_àôòì"}||$opts{"÷ì_àôòåì"});
push @binyanim, $Word::niqtal if ($opts{"ðô"});
push @binyanim, $Word::hiqtil if ($opts{"äô"});
push @binyanim, $Word::huqtal if ($opts{"äå"});
push @binyanim, $Word::qitel if ($opts{"ôé"});
push @binyanim, $Word::qutal if ($opts{"ôå"});
push @binyanim, $Word::hitqatel if ($opts{"äú"});
$transitive{$Word::qal}=1 if ($opts{"÷ì_àôòì+"}||$opts{"÷ì_àôòåì+"});
$transitive{$Word::hiqtil}=1 if ($opts{"äô+"});
$transitive{$Word::qitel}=1 if ($opts{"ôé+"});
$w->{opts}= \%opts; #TODO pass only relevant options.
foreach $b (@binyanim) {
$w->binyan($b);
# When the options ðñúø is given, $word is not the root to conjugate, but
# rather the 3rd person masculine singular form of the verb. We seldom
# use this input method, and usually generate this base form automatically
# (in the parameter-less abar_nistar function).
if ($opts{"ðñúø"}) {$w->abar_nistar($word);}
else { $w->abar_nistar;}
# in past, hem==hen and in niqqudless script so is at==ata. But the
# objectization is different, so we generate both at and ata. And for
# the sake of completeness of the morphological analysis, also hen is
# added.
foreach $g ($Word::hu,$Word::ani,$Word::ata,$Word::at,$Word::hi,
$Word::anu,$Word::atem,$Word::aten,$Word::hem,$Word::hen)
{
$w->{object} = undef; #clear objectization
$s = $w->past_conj($g);
$w->outword($s);
# support for the mostly-archaic nitpa`el form.
if ($w->{binyan} eq $Word::hitqatel && ${$w->{opts}}{"âí_ðú"}) {
$s =~ s//ð/o;
$w->outword($s);
}
if (defined($s) && $transitive{$w->{binyan}}) {
next if $g eq $Word::aten; # $aten's transitivisation is as $atem's
$w->{second_bj_form} = 0;
foreach $bj ($Word::ani,$Word::ata,$Word::at,$Word::hu,$Word::hu,$Word::hi,
$Word::anu,$Word::atem,$Word::aten,$Word::hem,$Word::hen) {
$w->{second_bj_form} = !$w->{second_bj_form} if $bj eq $Word::hu;
my $n = $w->objectize($s, $bj);
$w->outword($n) if $n;
}
}
}
$w->{guf} = undef; # some cleanup.
$w->{object} = undef;
my $s = $w->infinitive_conj;
if (defined($s)) {
&output_infinitive($s, $transitive{$w->{binyan}});
if ($w->{binyan} eq $Word::niqtal && ${$w->{opts}}{'âí_ìéäðåú'}) {
my $lehanot = $s;
$lehanot =~ s/äéä/éä/o;
$w->{object} = undef;
&output_infinitive($lehanot, $transitive{$w->{binyan}});
}
}
# in imperative only at,ata,atem,aten (second person)
foreach $g ($Word::ata,$Word::at,$Word::atem,$Word::aten)
{
$w->{object} = undef; #clear objectization
$s = $w->imperative_conj($g);
$w->outword($s);
if (defined($s) && $transitive{$w->{binyan}}) {
next if $g eq $Word::aten; # TODO do $aten have objectization??
foreach $bj ($Word::ani,$Word::ata,$Word::at,$Word::hu,$Word::hi,
$Word::anu,$Word::atem,$Word::aten,$Word::hem,$Word::hen) {
my $n = $w->objectize($s, $bj);
$w->outword($n) if $n;
}
}
}
foreach $g ($Word::ani,$Word::ata,$Word::at,$Word::hu,$Word::hi,
$Word::anu,$Word::atem,$Word::aten,$Word::hem,$Word::hen)
{
$w->{object} = undef; #clear objectization
$s = $w->future_conj($g);
$w->outword($s);
if (defined($s) && $transitive{$w->{binyan}}) {
next if $g eq $Word::aten || $g eq $Word::hen;
$w->{second_bj_form} = 0;
foreach $bj ($Word::ani,$Word::ata,$Word::at,$Word::hu,$Word::hi,$Word::hu,
$Word::hi,$Word::anu,$Word::atem,$Word::aten,
$Word::hem,$Word::hen) {
# a trick to flip second_bj_form for the second time of bj=hu/hi
$w->{second_bj_form} = !$w->{second_bj_form} if $bj eq $Word::hu;
my $n = $w->objectize($s, $bj);
$w->outword($n) if $n;
}
}
}
$w->{second_bj_form}=1; # only this is accepted in the present tense.
#and no reason to repeat it for every objectization.
# the gufs of the present tense are very much different than in other
# tenses. Nevertheless, we use at, ata, atem, aten as representatives of
# yaxid, yxida, rabbim, rabbot.
foreach $g ($Word::ata,$Word::at,$Word::atem,$Word::aten)
{
$w->{object} = undef; #clear objectization
$s = $w->present_conj($g);
$w->outword($s);
if (defined($s) && $transitive{$w->{binyan}}) {
foreach $bj ($Word::ani,$Word::ata,$Word::at,$Word::hu,$Word::hi,
$Word::anu,$Word::atem,$Word::aten,$Word::hem,$Word::hen) {
my $n = $w->objectize($s, $bj);
$w->outword($n) if $n;
}
}
$w->{object} = undef; #clear objectization
if ($s) {
if ($g eq $Word::at) {
# output the nismach form, even if identical to the nifrad:
$s =~ s/ä$/ú/o; $s =~ s/$/-/o; $w->outword($s);
# create the other form of the present female if both are requested
if (${$w->{opts}}{"áéðåðéú_úä"} ||
$w->_nakey_lh && $w->{binyan} eq $Word::huqtal) {
${$w->{opts}}{"áéðåðéú_àøëàéú"} = 1;
$s = $w->present_conj($g);
$w->outword($s);
$s =~ s/ä$/ú/o; $s =~ s/$/-/o; $w->outword($s);
${$w->{opts}}{"áéðåðéú_àøëàéú"} = 0;
}
} elsif ($g eq $Word::atem) {
$s =~ s/í$/-/; $w->outword($s);
} else {
$s =~ s/$/-/; $w->outword($s);
}
}
}
$s = $w->shempeula_conj;
if ($s) {
$s =~ s/C/ö'/o;
$s =~ s/J/â'/o;
$s =~ s/Z/æ'/o;
$s =~ s/([ëîðôö])$/$fin{$1}/;
$s =~ s/h/ä/o;
$s =~ s/[éI]yå/éå/go;
$s =~ s/(?<=[^åéy])y(?=[^åéyä]|$)/éé/go;
$s =~ s/y/é/go; # otherwise, just one yud.
$s =~ s/åw/å/go;
$s =~ s/(?<=[^åw])w(?=[^åw-])/åå/go; # if vav needs to be doubled, do it
$s =~ s/([ëîðôö])$/$fin{$1}/;
print SHEMP $s." ò";
# for male shemps ending with åú, we must pass a hint to wolig.pl
print SHEMP ",éí" if ($w->{binyan} eq $Word::qitel && $s =~ m/åú$/o);
print SHEMP "\n"
}
print "-----\n";
}
# Create the pa`ul form, when applicable.
if (${$w->{opts}}{"ôòåì"} || ${$w->{opts}}{"÷ì_àôòì"}
|| ${$w->{opts}}{"÷ì_àôòåì"}) {
foreach $g ($Word::ata,$Word::at,$Word::atem,$Word::aten)
{
$s = $w->paul_conj($g);
$w->outword($s);
if ($s) {
if ($g eq $Word::at) {$s =~ s/ä$/ú-/; $w->outword($s);}
elsif ($g eq $Word::atem) {$s =~ s/í$/-/; $w->outword($s);}
else {$w->outword($s.'-')}
}
}
print "-----\n" if $s;
}
}
}
# since in a (very) few cases I want to print two types of infinitive, I moved
# it all into a subroutine.
sub output_infinitive() {
my ($s, $is_trans) = @_;
# in most cases, we want to accept all bklm in the initial. but since the
# code is less-than-perfect, it relies on 'ì' so we substitute the lamed
# with L only temporarily. TODO: correct this stupidity.
my $tmps = $s;
$tmps =~ s//L/ if !$opts{'î÷åø_àáã_ôð'};
$w->outword($tmps);
# infinitives that lost their p"n, should regain it in their bkm form.
# Here, with this B prefix, we allow only the bet form.
# TODO: correct this silly add/remove/regain drill
if ($opts{'î÷åø_àáã_ôð'}) {
$tmps =~ s/^ìé/B$w->{q}/;
$w->outword($tmps);
}
# the infinitive form of all verbs has subjectization in all pronouns.
# however, transitive verbs have also objectization, which is exactly the
# same for most pronouns. therefore, for transitive verbs we print
# subjectization only for $ani.
#
# TODO: resolve the following linguistic question: Is there a difference
# in the pronunciation and spelling of áãåçôí (when they push, bdoxpam)
# and ìãçåôí (to push them, lidxpam)? The first is an subjectization of
# ìãçåó, and the second is a objectization. I do *not* know if the above
# differentiation is valid or correct, and failed to find references to
# support my gut feeling. Thus, on the mean while, I produce a waw-less
# form, as done by rav-millim.
if ($is_trans) {
foreach $bj ($Word::ani,$Word::ata,$Word::at,$Word::hu,$Word::hi,
$Word::anu,$Word::atem,$Word::aten,$Word::hem,$Word::hen) {
my $n = $w->objectize($s, $bj);
$w->outword($n) if $n;
}
my $n = $w->objectize($s, $Word::ani, SUBJECTIZE);
$w->outword($n) if $n;
} else { # output only subjectizations for intransitive verbs.
foreach $bj ($Word::ani,$Word::ata,$Word::hu,$Word::hi,
$Word::anu,$Word::atem,$Word::aten,$Word::hem,$Word::hen) {
my $n = $w->objectize($s, $bj, SUBJECTIZE);
$w->outword($n) if $n;
}
}
}
{
package Word;
our (@all_binyan,@all_guf,%mishqal_abar,%coran_abar);
# When SUBJECTIZE is passed to the objectize function, it creates the kinnuy
# xabur that signifies the subject of a sentence, rather than the object.
# In some (few) cases it makes a difference.
use constant SUBJECTIZE => 1;
sub INIT {
@all_binyan =
($qal, $niqtal, $qitel, $qutal, $hitqatel, $hiqtil, $huqtal) =
('a','b','c','d','e','f','g');
@all_guf = ($ani, $ata, $at, $hu, $hi, $anu, $atem, $aten, $hem, $hen) =
('A','B','C','D','E','F','G','H','I','J');
%mishqal_abar = ($qal => 'qtl', $niqtal => 'ðqtl',
$qitel => 'qétl', $qutal=>'qåtl', $hitqatel=>'äúqtl',
$hiqtil=>'äqtél', $huqtal=>'äåqtl');
%coran_abar = ($ani=>'úé', $ata=>'ú' , $at=>'ú', $hu=>'', $hi=>'ä',
$anu=>'ðå', $atem=>'úî', $aten=>'úð', $hem=>'å', $hen=>'å');
%future_initial = ($ani=>'à', $ata=>'ú', $at=>'ú', $hu=>'é', $hi=>'ú',
$anu=>'ð', $atem=>'ú', $aten=>'ú', $hem=>'é', $hen=>'ú');
%subject_suf = ($ani=>'é', $ata=>'ê', $at=>'ê', $hu=>'äå', $hi=>'ä',
$anu=>'ðå', $atem=>'ëí', $aten=>'ëï', $hem=>'í', $hen=>'ï');
%object_suf = ($ani=>'ðé', $ata=>'ê', $at=>'ê', $hu=>'äå', $hi=>'ä',
$anu=>'ðå', $atem=>'ëí', $aten=>'ëï', $hem=>'í', $hen=>'ï');
($past, $present, $future, $imperative, $infinitive, $adjective) =
(1, 2, 3, 4, 5, 6);
%gname = ($ani => 'àðé', $ata => 'àúä', $at => 'àú', $hu => 'äåà',
$hi => 'äéà', $anu => 'àðå', $atem => 'àúí', $aten => 'àúï',
$hem => 'äí', $hen => 'äï');
%pname = ($ata => 'éçéã,æ', $at => 'éçéã,ð',
$aten => 'øáéí,ð', $atem => 'øáéí,æ');
%tname = ($past=>'òáø', $present=>'äååä', $future=>'òúéã',
$imperative=>'öéååé', $infinitive=>'î÷åø', undef=>'-');
}
sub new {
my ($c, $r) = @_;
my $w = {};
root($w, $r) if (defined $r);
return bless $w;
}
sub root {
my ($w, $r) = @_;
if ($r =~ m/(.*)-(.*)-(.*)/o) {
$w->{root} = $1.$2.$3;
$w->{q} = $1;
$w->{t} = $2;
$w->{l} = $3;
} else {
$w->{root} = $r;
$w->{q} = substr($r,0,1);
$w->{t} = substr($r,1,length($r)-2);
$w->{l} = substr($r,-1,1);
}
}
sub binyan {
my ($w, $b) = @_;
$w->{binyan} = $b;
$w->{mishqal} = $mishqal_abar{$b};
}
sub _subst_root {
my ($w, $s) = @_;
$s =~ s/q/$w->{q}/g;
$s =~ s/t/$w->{t}/g;
$s =~ s/l/$w->{l}/g;
return $s;
}
sub _bdoq_sikul {
my $w = shift;
return if ($w->{q} !~ m/[ãæèñöCZJùú]/o);
$w->{mishqal} =~ s/^äúq/äqú/ if ($w->{q} =~ m/[ñù]/o);
$w->{mishqal} =~ s/^äúq/äqè/ if ($w->{q} =~ m/[öC]/o);
$w->{mishqal} =~ s/^äúq/äqã/ if ($w->{q} =~ m/[æZJ]/o);
$w->{mishqal} =~ s/^äúq/äéq/ if ($w->{q} =~ m/[úãè]/o &&
!${$w->{opts}}{"ùîåø_ôã"});
}
# create the abar_nistar form, unless it is provided as the param.
sub abar_nistar {
my ($w, $n) = @_;
if (defined $n) {$w->{nistar}=$n; return;}
# The academia rules says: no yod in qitel quadruple
$w->{mishqal} =~ s/é//
if ($w->{binyan} eq $qitel) && (length($w->{t}) > 1) &&
# but keep a double yod if specifically asked to.
!(${$w->{opts}}{"ùîåø_ôé"} && $w->{q} eq 'é');
# nakey p"n
if (($w->{q} eq 'ð' || $w->{q} eq 'é' && $w->{t} eq 'ö') &&
!${$w->{opts}}{"ùîåø_ôð"} &&
$w->{t} !~ m/[øòäàç]/o) { # non guttural ayin-poal!
$w->{mishqal} =~ s/q//o if $w->{binyan} =~ m/[$hiqtil$huqtal]/o &&
$w->{t} !~ m/^[éå]$/o;
# REM: this special niqtal behavior is based on my personal feeling only
$w->{mishqal} =~ s/q/é/o if $w->{binyan} eq $niqtal;
}
# nakey p"y
if ($w->{q} eq 'é' && !${$w->{opts}}{"ùîåø_ôé"} ) {
$w->{mishqal} =~ s/q/å/ if $w->{binyan} =~ m/[$niqtal$hiqtil]/o;
$w->{mishqal} =~ s/q// if $w->{binyan} eq $huqtal;
}
# consonantal p"y - double only in hitqatel??
# -> no, also for quadruple roots (taken care of above)
if ($w->{q} eq 'é') {
$w->{mishqal} =~ s/q/qq/ if $w->{binyan} eq $hitqatel;
}
# nakey ayin waw
if ($w->{t} =~ m/^[éå]$/o) { # if it is nake, drop the waw/yod
$w->{mishqal} =~ s/t//
if $w->{binyan} =~ m/[$huqtal$qal$hiqtil]/o;
$w->{mishqal} =~ s/(?<=[ðä])i// if $w->{binyan} =~ m/[$hiqtil$niqtal]/;
$w->{mishqal} =~ s/([éå])?tl/åll/o
if $w->{binyan} =~ m/[$qitel$qutal$hitqatel]/o;
$w->{mishqal} =~ s//ðé/o
if $w->{binyan} eq $niqtal && ${$w->{opts}}{"ðô_ðéæåï"};
}
# kpulim
if ($w->{t} eq $w->{l} && !${$w->{opts}}{"ùîåø_òò"}) {
$w->{mishqal} =~ s/t// if $w->{binyan} eq $qal;
$w->{mishqal} =~ s/t/å/ if $w->{binyan} eq $niqtal;
$w->{mishqal} =~ s/// if $w->{binyan} eq $hiqtil;
$w->{mishqal} =~ s/t// if $w->{binyan} eq $huqtal;
$w->{mishqal} =~ s/é/å/ if $w->{binyan} eq $qitel;
$w->{mishqal} =~ s/qt/qåt/ if $w->{binyan} eq $hitqatel;
}
$w->_bdoq_sikul if ($w->{binyan} eq $hitqatel);
#nakey l"h
if ($w->_nakey_lh) {
$w->{mishqal} =~ s/é// if $w->{binyan} eq $hiqtil;
}
$w->{nistar} = $w->_subst_root($w->{mishqal});
}
sub _nakey_lh {
my ($w) = @_;
return $w->{l} eq 'ä'
}
sub _past_cond8 {
my ($w) = @_;
my $hataya = $w->{nistar};
# certain doubled roots have the regular conjugation in few gufim in the
# past tense.
$hataya = $w->{q}.$w->{t}.$w->{l} if ${$w->{opts}}{'áéðåðé_ùåîø'} &&
$w->{binyan} eq $qal && $w->{t} eq $w->{l} &&
$w->{guf} =~ m/[$hu$hi$hem$hen]/o;
if ($w->{nistar} =~ m/^.[éå]?.$/o || #one vowel
$w->{binyan} eq $hiqtil) { #last vowel i
#case 9 - only add coran.
} else {
#case 10 - remove last vowel (NNN)
#TODO check subcase (b)
}
return $hataya;
}
sub past_conj {
my ($w, $guf) = @_;
$w->{guf} = $guf;
my $coran = $coran_abar{$guf};
$w->{coran} = $coran;
my $hataya = $w->{nistar};
return undef if ${$w->{opts}}{'àéï_òáø'};
$w->{tense} = $past;
#condition #2
if ($w->_nakey_lh) {
#condition #14 - does the coran begin with consonant
if ($coran =~ m/^[úð]/o) { # begins with consonant
#condition #15
if ($w->{binyan} =~ m/[$qal$hiqtil]/o) {
#case #16 - replace last vowel with i
#TODO: check subcase (a)
$hataya =~ s/ä$/é/;
} else {
#case #17 - replace last vowel with ey
$hataya =~ s/ä$/é/;
}
} elsif ($coran eq 'å') {
#case #18 - remove last vowel
$hataya =~ s/ä$//;
} elsif ($coran eq 'ä') {
$hataya =~ s/éä$//; # this yod is consonantal. only for ðäééúä
#case #19 - replace last vowel with t
#TODO: check subcase (b)(d)
$hataya =~ s/ä$/ú/;
}
} elsif ($w->{l} eq 'à') {
#codition #7
if ($coran !~ m/^[úð]/o) { # begins with vowel
$hataya = $w->_past_cond8;
} else {
#condition #11
if ($hataya eq $qal) {
#case #12 - only add coran
#TODO check subcase (a)
} else {
#case #13 - replace last vowel with e (NNN)
#TODO check subcase (a)
# TODO: is this enough? are there any other cases???
$hataya =~ s/(ä.+)é(.)$/$1$2/;
}
}
} else {
#condition #3
if ($coran =~ m/^[úð]/o) { # begins with consonant
#cond #4
if ($hataya =~ m/ð.å./o && !${$w->{opts}}{'ðñåá_îåãøðé'}) {
#case #5 - replace last vowel with u and add o.
#TODO check subcase (a)
$hataya =~ s/$/å/o;
} else {
#case #6
# do not remove consonantal yod!
unless ($w->{binyan} eq $hitqatel && $w->{t} =~ m/[éå]$/o) {
# usually two letters in (ä..)é(.) are enough,
# but for äååøéã and äéôéì I allow more and less.
$hataya =~ s/(ä.+)é(.)$/$1$2/;
}
# for freaking doubled root
$hataya .= 'å' if $w->{binyan} eq $qal && $w->{t} eq $w->{l} &&
!${$w->{opts}}{'ùîåø_òò'} &&
!${$w->{opts}}{'çãúé_îåãøðé'};
#TODO check subcases (a) (c)
### check (c):
$hataya =~ s/^ä(.)(.)$/ä$1é$2å/o if ${$w->{opts}}{'äñéáåúé_éùï'};
}
} else {$hataya = $w->_past_cond8;}
}
# if the last consonant of the basis is equal to the first of the coran, one
# of the should usually go.
if (substr($hataya,-1,1) eq substr($coran,0,1) &&
!${$w->{opts}}{"ùîåø_ì"}) {
$hataya =~ s/.$//o;
}
# extremely singular exception ðúï
$hataya =~ s/(ðé?ú)ð$/$1/o if $coran =~ m//o;
$hataya .= $coran;
$w->{abar} = $hataya; # remove this ugly duplicity
return $hataya;
}
sub _cond_debug {
# print "debug: ", shift, "\n";
}
sub infinitive_conj {
my ($w) = @_;
my $n = $w->{nistar};
$w->{tense} = $infinitive;
if (${$w->{'opts'}}{'î÷åø'}) {
$w->{infinitive} = ${$w->{'opts'}}{'î÷åø'};
return $w->{infinitive};
}
return undef if ${$w->{opts}}{'àéï_î÷åø'};
#cond #2 - does abar_nistar have exactly 2 syllables?
#_cond_debug(2);
if ($w->{binyan} ne $hitqatel && $n !~ m/^.[éå]?.$/o) {
#cond #3 - does $n begin with non-root nun?
#_cond_debug(3);
if ($w->{binyan} eq $niqtal) {
#case #4
#check (a,b) - NNN No niqqud - no care. check (c) below.
#_cond_debug(4);
# double consonant waw
$n =~ s/^ðå([^å])/ðåå$1/ if $w->{q} =~ m/[éå]/o ;
# $n = 'ìâùú' if $n eq 'ðéâù'; # singular exception
if (${$w->{'opts'}}{'ðô_ðéæåï'}) {
$n =~ s/^ðé?/ìäé/o;
} elsif ($w->{q} ne 'ð') {
$n =~ s//ìäé/o;
} else {
$n =~ s/^ð[ðé]/ìäéð/o if $w->{q} eq 'ð';
}
} else {
#cond #5 - is the first vowel e/i?
#_cond_debug(5);
if ($w->{binyan} =~ m/[$qitel$hitqatel$hiqtil]/o) {
#case #6
#_cond_debug(6);
# double consonant waw
$n =~ s/^åé/åå/ if $w->{q} eq 'å';
# remove i vowel, but not double yod
$n =~ s/^(.)[éi]/$1/ if $w->{q} ne 'é' && $w->{t} ne 'é';
$n = 'ì'.$n;
} else {
#cond #7 - is the first vowel a?
#_cond_debug(7);
if ($w->{binyan} eq $qal) {
#cond #8 - does $n appear in list (I)
#_cond_debug(8);
if (${$w->{'opts'}}{'î÷åø'}) {
#case #9
#_cond_debug(9);
# I keep List I in the data file.
$n = ${$w->{'opts'}}{'î÷åø'};
} else {
#cond #10 - does $n begin with aleph?
#_cond_debug(10);
if ($n =~ m//o) {
#case #11 - TODO
#TODO: check (c,d)
#_cond_debug(11);
$n =~ s/(.)$/å$1/;
$n = 'ì'.$n;
} else {
#cond #12 - does it begin with ayin?
#_cond_debug(12);
if ($n =~ m//o) {
#case #13
#TODO: check (c,d)
$n =~ s/å?(.)$/å$1/;
$n = 'ì'.$n;
} else {
#cond #14 - does it begin with xet?
#_cond_debug(14);
if ($n =~ m//o) {
#case #15
#TODO: check (c,d)
$n =~ s/å?(.)$/å$1/;
$n = 'ì'.$n;
} else {
#case #16
#TODO: check (c,d)
$n =~ s/å?(.)$/å$1/; # the å? is against triple ååå
$n =~ s/^ð([^øòäàç])/é$1/o if ${$w->{'opts'}}{'î÷åø_àáã_ôð'};
$n = 'ì'.$n;
}
}
}
}
} else {
#cond #17 - are the 2 vowels u and a?
if ($w->{binyan} =~ m/[$qutal$huqtal]/o) {
#case #18
$n = undef;
} else {
#case #20
#TODO: check (c,b)
$n = 'ì'.$n;
}
}
}
}
} else {
#cond #19 - has the base 3 vowels?
if ($w->{binyan} eq $hitqatel) {
#case #20
#TODO: check (c)
$n = 'ì'.$n;
} else {
#case #21 - if we're here - it's one-syllable base
#TODO: check (d)
my $internal;
$internal = 'å';
$internal = 'é' if $w->{t} eq 'é';
$n = 'ì'.substr($n,0,1).$internal.substr($n,-1,1);
}
}
if (defined($n)) {
$n =~ s/å?ä$/åú/o; #check (c)
if ($w->{binyan} eq $qal) { # check (e)
# $n =~ s/^ìð([^øòäàç])/ì$1/o unless ${$w->{'opts'}}{'ùîåø_ôð'};
}
}
$w->{infinitive} = $n;
return $n;
}
sub _imperative_cond7 {
my ($w, $m) = @_;
# in the really rare case of doubled root, in $qal-efal, drop the xolam.
$m =~ s/å(?=.$)// if ${$w->{opts}}{'÷ì_àôòì'} && $w->{binyan} eq $qal &&
!${$w->{opts}}{'ùîåø_òò'} && $w->{t} eq $w->{l};
# cond #7 - is the guf at or atem?
#_cond_debug(7);
if ($w->{guf} =~ m/[$at$atem$aten]/o) {
# case #8 - if gone through cond #4, remove final he TODO
if ($w->{l} eq 'ä')
{
$m =~ s/ä$/é/ if $w->{guf} eq $aten;
$m =~ s/ä$// if $w->{guf} ne $aten;
}
$m .= 'é' if $w->{guf} eq $at;
$m .= 'å' if $w->{guf} eq $atem;
$m =~ s/ååå/åå/o; # remove triple waw!
# remove hiqtil's yod for 2pf
$m =~ s/é(.)$/$1/o if $w->{guf} eq $aten && $w->{binyan} eq $hiqtil;
$m =~ s/(?<=^.)å(?=ç$)//o if $w->{guf} eq $aten; # for ðåç
$m =~ s/ð?$/ðä/ if $w->{guf} eq $aten;
} else {
# case #9 - if came through cond #6, convert final xiriq to ceire
$m =~ s/(.)é(.)$/$1$2/ if $w->{binyan} eq $hiqtil;
}
return $m;
}
sub _imperative_action18 {
my ($w, $m) = @_;
if ($m ne 'òåö') { # exclude singular exception
#remove final o, but not double waw
$m =~ s/([^å])å(.)$/$1$2/o if $w->{guf} ne $aten;
}
$m .= 'é' if $w->{guf} eq $at;
$m .= 'å' if $w->{guf} eq $atem;
# $m =~ s/ååå/åå/o; # remove triple waw!
if ($w->{guf} eq $aten) {
$m =~ s/ð?$/ðä/;
return $m;
}
# cond #19 - is the second final consonant guttural?
#_cond_debug(19);
if (0) {
# action #20
} else { # action #21
# perform only if cond #13 is true (copied here)
}
return $m;
}
sub imperative_conj { # imperative
my ($w, $guf) = @_;
$w->{guf} = $guf;
$w->{tense} = $imperative;
$w->infinitive_conj unless $w->{infinitive}; # requires maqor
# $w->past_conj unless $w->{abar}; # and the past form, ???.
my $m = $w->{infinitive};
return undef unless $m; # in case there is no maqor form.
# only second persons have imperative form
return undef unless $w->{guf} =~ m/[$ata$at$atem$aten]$/o;
return undef if ${$w->{opts}}{'àéï_öéååé'};
# I like to shorten the he in the infinitives ìéäðåú, but the imperative
# should not suffer, so the he is returned here.
$m =~ s/^ìéä/ìäéä/ if $w->{q} eq 'ä' && $w->{binyan} eq $niqtal;
# action #2 - remove initial lamed
$m =~ s///o;
# consonant yod/waw should not be doubled in the beginning of word.
$m =~ s/^éé/é/o if $w->{binyan} eq $qitel;
$m =~ s/^åå/å/o if $w->{binyan} eq $qitel;
# cond #3 - does m end with åú and the abar with ä?
#_cond_debug(3);
if ($m =~ m/åú$/o && $w->{nistar} =~ m/ä$/o) {
# action #4
$m =~ s/ååú$/ååä/; # keep consonantal waw
$m =~ s/åú$/ä/; # seems redundant - if $w->{guf} eq $ata;
$m = $w->_imperative_cond7($m);
} else {
# case #5 - are $m and the abar one-syllabled?
#_cond_debug(5);
if ($w->{nistar}=~m/^.[éå]?.$/o && $m =~ m/^.[éå]?.$/o) {
# jump to cond #7
$m = $w->_imperative_cond7($m);
} else {
# cond #6 - is the final vowel a xiriq male?
# in other words, is it hifgil?
#_cond_debug(6);
if ($w->{binyan} eq $hiqtil) {
# jump to cond #7
$m = $w->_imperative_cond7($m);
} else {
#cond #10 - if not in list1, does $m end with ú and milgeli?
#_cond_debug(10);
#I replace List I with a tag in the data file:
$m = ${$w->{opts}}{"öéååé"} if ${$w->{opts}}{"öéååé"};
if (${$w->{opts}}{"î÷åø_îìòéìé"}) {
$m =~ s/ú$//o if ${$w->{opts}}{"î÷åø_îìòéìé"};
# for feminine or plural, jump to action #18
$m = $w->_imperative_action18($m);
} else {
# cond #12 - is the first consonant has schwa/xataf?
#_cond_debug(12);
if ($w->{binyan} eq $qal && $m!~m/^.[åé].$/o) { #TODO: is this a good rule?
# cond #13 - is $m in list2? Or does it end with guttural
# consonant?
#_cond_debug(13);
if (defined(${$w->{opts}}{"öéååé"})) {
$m = ${$w->{opts}}{"öéååé"} if ${$w->{opts}}{"öéååé"};
} elsif ($m =~ m/[àçähò]$/o || $m =~ m/[àçähò]å?.$/o) {
# action #14 TODO: check double star **
$m =~ s/å(.)$/$1/;
} else {
}
} else {
# go to action #15
}
# action #15 - return if $ata is required
# check (b) - initial nun may stay or drop.
# anyhow, an initial yod replacement must drop.
if ($w->{binyan} eq $qal) { # where else there can be a nun shwa'it
my $tmp_q = '';
$tmp_q = $w->{q} if ${$w->{opts}}{"öéååé_ùîåø_ôð"}
||${$w->{opts}}{"ùîåø_ôé"};
$m =~ s/^é(.å?.)$/${tmp_q}$1/;
# double consonantal yod with xiriq, in the rare cases it appears.
$m =~ s/^é([^é])/éé$1/o if ($w->{guf} eq $at ||$w->{guf} eq $atem)
&& ${$w->{opts}}{"ùîåø_ôé"};
}
####### end of check (b)
if ($w->{guf} ne $ata) {
# cond #16 - is the mishqal hi..o. (hisob)
#_cond_debug(16);
if ($m =~ m/^äé.å.$/o) {
# action #17
$m .= 'é' if $w->{guf} eq $at;
$m .= 'å' if $w->{guf} eq $atem;
$m =~ s/ð?$/ðä/ if $w->{guf} eq $aten;
} else {
# action #18
$m = $w->_imperative_action18($m);
}
}
}
}
}
}
return $m;
}
sub _future_cond10 {
my ($w, $m) = @_;
$m =~ s///o;
# check (b) - should the initial nun drop?
$m =~ s/^ð([^àäçòøéå])/é$1/o if !${$w->{opts}}{"ùîåø_ôð"};
# cond #10 - is it $ani?
#_cond_debug('10');
if ($w->{guf} eq $ani) {
# case #11
$m =~ s//å/o if ${$w->{opts}}{"÷ì_àôòì"} &&
$w->{binyan} eq $qal && # for àåäá
!${$w->{opts}}{"òúéãé_ààîõ"};
$m =~ s///o if $w->{binyan} eq $niqtal # àùîø ìðôùé )åìà àéùîø(
# úùìåí-ãâù âåøí ìöéøä åìéåã âí àçøé àìó
&& $w->{q} !~ m/[øòäàçåé]/
# àéæåï åìà àæåï
&& !${$w->{opts}}{"ðô_ðéæåï"}
# àôåì åìà àéôåì
|| ($w->{binyan} eq $qal && $w->{q} ne 'é')
# àöå÷ åìà àéöå÷
|| ($w->{binyan} eq $qal && $w->{root}=~m/^éö/o);
} else {
# case #12
}
my $fi = $future_initial{$w->{guf}};
$m = $fi.$m unless $fi eq 'é' and $m =~ m/^éé/o;
return $m;
}
sub future_conj { #chart 5 (V)
my ($w, $guf) = @_;
$w->{guf} = $guf;
# $w->{tense} = $future; # chart4 overrides it for passives
my $m = $w->_future_conj_chart4;
return undef if ${$w->{opts}}{'àéï_òúéã'};
# no addition for some persons
return $m if $w->{guf} =~ m/[$ani$ata$hi$hu$anu]/o;
# case #2 - does $m end with segol?
if ($w->_nakey_lh) {
#case #3
#_cond_debug('V3');
$m =~ s/ä$//;
$m = $m.'é' if $w->{guf} eq $aten || $w->{guf} eq $hen;
} else {
#action #4 - NNN
# cond #5 - is the final vowel i/u TODO (*)
#_cond_debug('V5');
if ($m =~ m/[éå].$/o && !${$w->{opts}}{"÷ì_àôòåì"}
|| $m =~ m/^[úé].å.$/o) { # TODO: check rule
#print "aaa $m\n";
#case #7
#_cond_debug('V7');
} else {
#cond #6 - is the final vowel o, and also in the past?
#_cond_debug('V6');
if ($m =~ /å.$/ && $w->{nistar} =~ /å.$/) {
#case #7
#_cond_debug('V7');
} else {
# cond #8 - is the second final consonant guttural?
#_cond_debug('V8');
if (0) {
#case #9 TODO check (**)
#_cond_debug('V9');
} else {
#case #10
#_cond_debug('V10');
}
$m =~ s/å(.)$/$1/o if $w->{guf} !~ m/^($hen|$aten)$/o; # is it good??
}
}
}
# add guf suffix: for 2pm and 3pm
$m .= 'å' if $w->{guf} eq $atem || $w->{guf} eq $hem;
$m =~ s/ååå/åå/o; # remove triple waw!
if ($w->{guf} eq $aten || $w->{guf} eq $hen) {
# remove hiqtil's yod for 2pf and 3pf
$m =~ s/é(.)$/$1/o if $w->{binyan} eq $hiqtil;
# and also qal's yod (nakey ayin-yod) - but not double yod
$m =~ s/(?<=[^é])é(?=[^é]$)//o if $w->{binyan} eq $qal
&& $w->{t} =~ m/é/o;
$m =~ s/(?<=^ú.)å(?=ç$)//o if $w->{t} eq 'å'; # for ðåç
# remove double nun for 2pf and 3pf
$m =~ s/ð?$/ðä/o;
}
# final yod for 2sf
$m .= 'é' if $w->{guf} eq $at;
return $m;
}
sub _future_conj_chart4 {
my ($w) = @_;
$w->infinitive_conj unless $w->{infinitive}; # requires maqor
$w->{tense} = $future;
my $m = $w->{infinitive};
if (!$m) { # comment (*)
$w->abar_nistar unless $w->{nistar};
$m = 'ì'.$w->{nistar};
}
if ($m eq 'ìâùú') { #remove singular exception
$m = 'ìäéâù';
}
# cond #2 - does $m begin with non-root he?
#_cond_debug('2');
#TODO: (**)
if ($w->{binyan} =~ m/[$niqtal$hiqtil$huqtal$hitqatel]$/o) {
# action #3
$m =~ s/^ìä/ì/;
# jump to #10
$m = $w->_future_cond10($m);
} else {
# cond #4 - does $m begin with xiriq and end with xolam?
#_cond_debug('4');
if ($w->{binyan} eq $qal && $m =~ m/^ì..å?å.$/o) {
# cond #5 - is one of the 2 last consonant guttural?
# is it an intransitive verb ??? TODO: what???
# TODO: (+)
#_cond_debug('5');
if (${$w->{opts}}{"÷ì_àôòì"}) {
#action #6 - convert final o to a
$m =~ s/å(.)$/$1/;
#jump to #10
$m = $w->_future_cond10($m);
} else {
# jump to #10
$m = $w->_future_cond10($m);
}
} else {
#cond #7 - is $m in list1?
#_cond_debug('7');
# Ornan's list1 is implemented using the òúéã1 tag!
if (${$w->{opts}}{'òúéã1'}) {
#action #8 - convert according to list1
$m = ${$w->{opts}}{'òúéã1'};
#jump to #10
$m = $w->_future_cond10($m);
} else {
# cond #9 - does $m have 1 syllable?
#_cond_debug('9');
if (0) {
#jump to #10
$m = $w->_future_cond10($m);
} else {
#cond #13 - intransitive, mishqal laqtol?
# TODO (***) (++)
#_cond_debug('13');
if (0) {
#case #14
} else {
#cond #15 - what guf? - NNN
# if ($w->{guf} ne $ani) {
#case #16. TODO: check (a,b,c)
# } else {
#cond #17 - NNN
#cases #18, #19 TODO check (a)
# }
$m =~ s///;
my $fi = $future_initial{$w->{guf}};
$m =~ s/// if $w->{guf} eq $ani && ($w->{binyan} eq $niqtal
# àôåì åìà àéôåì
|| $w->{binyan} eq $qal && $w->{q} ne 'é');
# certain doubled roots have xiriq in the future.
$fi .= 'é' if ${$w->{opts}}{'òúéã_çøå÷'} &&
$w->{binyan} eq $qal && $w->{t} eq $w->{l} &&
$w->{guf} ne $ani;
$m =~ s/å(?=.$)// if ${$w->{opts}}{'÷ì_àôòì'} &&
!${$w->{opts}}{'ùîåø_òò'} && $w->{t} eq $w->{l};
$m = $fi.$m;
$m =~ s/ééé*/éé/;
# I hate triple yod
# $m = $fi.$m unless $fi eq 'é' and $m =~ m/^éé/o;
}
}
}
}
}
# checking (a):
$m =~ s/ååú$/ååä/ if ($w->{l} eq 'ä'); #keep consonant waw
$m =~ s/å?ú$/ä/ if ($w->{l} eq 'ä');
return $m
}
sub _present_conj_chart6 {
my ($w, $m) = @_;
# $m = $w->{nistar};
#cond #2 (+) - is it one syllable?
if ($m =~ m/^.[éå]?.$/o) {
#case #5
} else {
#cond #3 - does $m begin with non-root nun? (***)
if ($w->{binyan} eq $niqtal) {
# cond #4 - does $m have two syllables?
if (1) {
#jump to case #5
} else {
# case #6
}
} else {
# cond #7 - is the mishqal .a.e./.a.o. ???
if (0) {
# case #8
} else {
#cond #9 - is the mishqal .a.a. ?
if ($w->{binyan} eq $qal) {
#case #10 - (and avoid removing cons waw)
$m =~ s/^(.)([^å])/$1å$2/ if !${$w->{opts}}{"áéðåðé_ùîï"};
} else {
#action #11
$m = 'î'.$m;
#cond #12 - is it hiqtil ???
if ($w->{binyan} eq $hiqtil) {
#case #13
$m =~ s/^îä/î/;
} else {
# cond #14 - is it hitqatel,huqtal?
if ($w->{binyan} eq $hitqatel || $w->{binyan} eq $huqtal) {
# case #15
$m =~ s/^îä/î/;
} else {
#cond #16 - is the first vowel in nistar_abar is e/i
if ($w->{binyan} eq $qitel) { #is it a good rule?
#case #17 - but I like to keep double yod
$m =~ s/^î(.)[éi]/î$1/o if $w->{q} ne 'é';
# and to double consonant waw
$m =~ s/^îå/îåå/o if $w->{q} eq 'å';
} else {
#case #18
}
}
}
}
}
}
}
return $m;
}
sub _present_cond8 {
my ($w, $m) = @_;
#cond 8 - is it single female?
#_cond_debug(8);
if ($w->{guf} eq $at || $w->{guf} eq $hi) {
#cond #9 - is it niqtal?
#_cond_debug(9);
if ($w->_nakey_lh && ($w->{binyan} eq $niqtal||
($w->{binyan}eq $hiqtil || $w->{binyan}eq$huqtal) &&
$w->{archaic_sf} ) ) { # last two lines for (*)
$m =~ s/ä$/éú/;
return $m;
} #else continue to case #11
}
#case #11 - check (**)
$m =~ s/ä$//o; # remove final e if any.
$m =~ s/$/ä/o if $w->{guf} eq $at || $w->{guf} eq $hi;
$m =~ s/åå$/å/o if $w->{guf} eq $aten || $w->{guf} eq $hen
|| $w->{guf} eq $anu; #no triple waws, please!
$m =~ s/$/åú/o if ($w->{guf} eq $aten || $w->{guf} eq $hen
|| $w->{guf} eq $anu);
$m =~ s/$/éí/o if $w->{guf} eq $atem || $w->{guf} eq $hem;
return $m;
}
sub present_conj { #chart VII
my ($w, $guf) = @_;
$w->{guf} = $guf;
return undef if ${$w->{opts}}{"àéï_áéðåðé"};
$w->{tense} = $present;
my $m = $w->{nistar};
# certain doubled root have the regular conjugation in present.
$m = $w->{q}.'å'.$w->{t}.$w->{l} if ${$w->{opts}}{'áéðåðé_ùåîø'} &&
$w->{binyan} eq $qal && $w->{t} eq $w->{l};
$m = $w->_present_conj_chart6($m);
return $m if ($guf eq $ani || $guf eq $ata || $guf eq $hu);
$w->{archaic_sf} = ($guf eq $at || $guf eq $hi) &&
${$w->{opts}}{"áéðåðéú_àøëàéú"};
# cond #2 - does m ends with e (nake_lh)
if ($w->_nakey_lh) {
#jump to cond #8
$m = $w->_present_cond8($m);
} else {
# case #3 - does m have 1 syllable?
if ($m =~ m/^.[éå]?.$/o) {
#jump to cond #8
$m = $w->_present_cond8($m);
} else {
#action #4
#cond #5 - does the first vowel in the form Xa/Xe ???
# TODO this rule is awful!!
if ($w->{binyan} eq $niqtal && $m =~ m/^ðé?.å.$/o ||
(${$w->{opts}}{"áéðåðé_ùîï"} && $w->{binyan} eq $qal)) {
#action #6 - NNN
#jump to cond #8
$m = $w->_present_cond8($m);
} else {
#cond #7 - is the last vowel i? check (***)
if ($w->{binyan} eq $hiqtil && !$w->{archaic_sf}) {
#jump to cond #8
$m = $w->_present_cond8($m);
} else {
#cond #12 - is it single female? (and not archaic single female***)
if ($w->{binyan}eq$hiqtil || !$w->{archaic_sf} &&
($w->{guf} eq $at || $w->{guf} eq $hi)) {
#for check (***)
$m =~ s/é(?=.$)// if $w->{binyan} eq$hiqtil && $w->{archaic_sf};
#cond #13 - is the final consonant xet &ayin or he mapuqa
if ($m =~ m/[çòh]$/o) {
#case #14
$m = $m.'ú';
} else {
#cod #15 - is it aleph?
if ($m =~ m/à$/o) {
#case #16
$m = $m.'ú';
} else {
#case #17
$m = $m.'ú';
}
}
} else {
#cond #18 is the last vowel (ceiyre)? TODO???
#----- no care -- no niqqud
#case #24
$m = $m.'åú' if $w->{guf} eq $aten || $w->{guf} eq $hen
|| $w->{guf} eq $anu;
$m = $m.'éí' if $w->{guf} eq $atem || $w->{guf} eq $hem;
# added by me for archaic present forms
$m = $m.'ä' if $w->{guf} eq $at || $w->{guf} eq $hi;
}
}
}
}
}
return $m;
}
sub paul_conj {
my ($w, $guf) = @_;
$w->{guf} = $guf;
return undef if ${$w->{opts}}{"àéï_áéðåðé"} ||${$w->{opts}}{"àéï_ôòåì"};
return undef if $w->{t} =~ m/[éå]/o || ${$w->{opts}}{"ðñúø"};
my $m;
$w->{tense} = $adjective;
$m = $w->_subst_root('qtål');
$m =~ s/ä$/é/;
$m = $m.'åú' if $w->{guf} eq $aten || $w->{guf} eq $hen
|| $w->{guf} eq $anu;
$m = $m.'éí' if $w->{guf} eq $atem || $w->{guf} eq $hem;
$m = $m.'ä' if ($w->{guf} eq $at || $w->{guf} eq $hi);
return $m
}
sub shempeula_conj {
my ($w) = @_;
return ${$w->{opts}}{"ùí_ôòåìä"} if ${$w->{opts}}{"ùí_ôòåìä"};
return undef if $w->{binyan} eq $qutal || $w->{binyan} eq $huqtal ||
${$w->{opts}}{"àéï_ùí_ôòåìä"};
my $m;
if ($w->{binyan} =~ m/[$niqtal$hitqatel]$/o) {
$w->infinitive_conj unless $w->{infinitive};
$m = $w->{infinitive};
$m =~ s///;
$m = $m.'åú' unless $m =~ m/åú$/o && $w->{l} eq 'ä';
return $m;
}
if ($w->{binyan} eq $hiqtil) {
$w->infinitive_conj unless $w->{infinitive};
$m = $w->{infinitive};
$m =~ s///;
$m =~ s/é(.)$/$1ä/;
$m =~ s/åú$/éä/; #for nakey_lh
$m =~ s/(^...$)/$1ä/; # for 'doubled'
return $m;
}
return undef if ${$w->{opts}}{"ðñúø"};
$m = 'qtélä' if $w->{binyan} eq $qal;
$m = 'qétål' if $w->{binyan} eq $qitel;
# no yod for quadruple roots
$m =~ s/é//o if (length($w->{t}) > 1 && $w->{binyan} eq $qitel);
if ($w->_nakey_lh) {
$m =~ s/l/é/o if $w->{binyan} =~ m/^[$qal$hiqtil$qitel]$/o;
}
# nakey ayin waw
if ($w->{t} =~ m/^[éå]$/o) {
$m =~ s/t// if $w->{binyan} =~ m/[$qal$hiqtil]/o;
$m =~ s/t/l/o if $w->{binyan} =~ m/$qitel/o;
}
# aleph sopit - the more common form is with yod
if ($w->{l} eq 'à') { $m =~ s/l/é/o if $w->{binyan} eq $qitel; }
return $w->_subst_root($m)
}
sub objectize {
# $is_subj is 1 if the object that is fused into the verb is really the
# subject of a sentence.
my ($w, $s, $bj, $is_subj, $suf) = @_;
$w->{object} = $bj;
# according to barkali, no kinnuy havur when obj=subj
if ($w->{tense} !~ m/[$present$infinitive]/o){
return undef
if ($bj eq $w->{guf} && $bj =~ m/[$ani$anu$ata$at$atem$aten]/o);
return undef if "$bj $w->{guf}" =~ m/[$ani$anu] [$ani$anu]/o;
return undef if "$bj $w->{guf}" =~ m/[$at$ata] [$at$ata]/o;
return undef if "$bj $w->{guf}" =~ m/[$aten$atem] [$aten$atem]/o;
}
if ($is_subj) {$suf = $subject_suf{$bj}} else {$suf = $object_suf{$bj}}
$suf =~ s/^äå$/å/ if $w->{second_bj_form};
# The following handling may seem logical, but it is wrong according to the
# academia specifications. since the stem form does not have the internal yod,
# the conjugations don't obtain it either. "When I protected my country" should
# be spelled áäâðé òì àøöé, and not áäâéðé òì àøöé.
#
# # handling of Doubled
# if ($w->{binyan} eq $hiqtil &&
# $w->{t} eq $w->{l} && !${$w->{opts}}{"ùîåø_òò"}) {
# # add xiriq where there was ceire.
# $s =~ s/($w->{q})($w->{l}[äåé]?)$/$1é$2/;
# }
if ($w->{tense} eq $infinitive) {
# nadav (and the aqademia rules) requires dropping the å.
# in general the waw should be dropped. but what about the cases where it
# is replaced by a qamac qatan, like in the *obj*ectizations for the
# second person pronouns. TODO: this has to be sorted out some time, but
# on the mean while I'll follow ravmilim.co.il and always drop the waw.
$s =~ s/^ì(.)(.)å(?=.$)/ì$1$2/ if $w->{binyan} eq $qal &&
!$w->_nakey_lh;# && ($is_subj || $bj !~ m/[$atem$aten$at$ata]/o);
# the nun stays since it has qamac!
$s =~ s/^ìé/ìð/o if $w->{binyan} eq $qal && $w->{q} eq 'ð';
$suf =~ s/^äå$/å/;
if ($is_subj) {$s =~ s/ì/B/o;} else {$s =~ s/ì/L/o;}
# TODO barkali writes ìãòúé and not ìãòúðé. why?
} elsif ($w->{tense} eq $imperative) {
return undef
if "$bj $w->{guf}" =~ m/[$at$ata$atem$aten] [$at$ata$atem$aten]/o;
# in hifil, the dropped yod of second person returns
if ($w->{binyan} eq $hiqtil && $w->{guf} eq $ata &&
$w->{infinitive} =~ m/é.$/o) {$s =~ s/(?=.$)/é/o}
$s =~ s/ä$//o if $w->_nakey_lh;
$s =~ s/^(..)å(?=.$)/$1/ if $w->{binyan} eq $qal;
} elsif ($w->{tense} eq $past) {
$s =~ s/ä$//o if $w->_nakey_lh;
$s =~ s/ä$/ú/o if $w->{guf} eq $hi;
$s =~ s/ú[íïîð]$/úå/ if $w->{guf} eq $aten || $w->{guf} eq $atem;
$s .= 'é' if $w->{guf} eq $at;
# TODO: $suf = 'å' if $bj==$hu && past_pael ùðàå and not ùðàäå
# plural gufs don't have the second_bj_form
return undef if ($w->{second_bj_form} && $s =~ m/[åw]$/o);
} elsif ($w->{tense} eq $present) {
#TODO why Barkaly does not show objectization of female plurals??
#return undef if $w->{guf} =~ m/^($anu|$aten|$hen)$/o;
return undef unless $w->{second_bj_form};
$s =~ s/ä$/ú/o if $w->{guf} eq $at;
$s =~ s/ä$//o if ($w->_nakey_lh && $bj ne $hu);
if ($w->{guf} =~ m/[$atem$hem$anu$aten$hen]/o) {
$s =~ s/í$//o ;
$s =~ s/ú$/úé/o ;
$suf =~ s/^ðé$/é/o;
$suf =~ s/^([íï])$/ä$1/o;
$suf = 'éê' if $bj eq $at;
}
} elsif ($w->{tense} eq $future) {
$s =~ s/^([àúéð]..)å(?=.$)/$1/o if $w->{binyan} eq $qal;
$s =~ s/ä$//o if $w->_nakey_lh;
# few gufs has a second legal form for hu/hi objects.
# return it when second_bj_form is requested.
# for example àùîøðå/àùîøðä, aside to àùîøå/àùîøä
if ($w->{second_bj_form}) {
#only few gufs have second_bj_form.
return undef unless $w->{guf} =~ m/[$ani$ata$hu$hi$anu]/o;
$suf =~ s/^(?=[äå]$)/ð/o;
}
}
# $suf = $subject_suf{$bj} if !defined($suf); # TODO is this needed?
# TODO most of the objectized forms are very bizarre.
# we should decide what to do with them. DEBUG
$suf = $suf.'+' unless $w->{tense} eq $infinitive;
return $s.$suf;
}
sub outword {
my ($w, $s) = @_;
my $detail='';
return unless $s;
if ($detailed_output) {
my ($tense,$person,$bjtext)=('-','','');
# the anonymous hash looked much better than the translation
# code it replaces. However, the following named hashes are much faster...
$tense = $tname{ $w->{tense} };
if ($w->{guf}) {
$person = ','.$Word::gname{$w->{guf}};
if ($w->{tense} =~ m/[$present$adjective]/o) {
$person = ','.$Word::pname{$w->{guf}};
}
}
if ($w->{object}) {
$bjtext=",ëéðåé/".$Word::gname{$w->{object}} if $w->{object};
}
if ($w->{tense} eq $adjective) {
$detail = " ú$person";
} else {
$detail = " ô,$tense$person$bjtext";
}
$detail .= ',ñîéëåú' if $s =~ m/-$/o;
}
# the following is only an oversimplification of deornanization!!!
$s =~ s/^w(?=[Ié])/å/o;
$s =~ s/[wå][wå]/åå/o;
$s =~ s/(?<=[å])w/å/o;
$s =~ s/w/åå/o;
$s =~ s/y(?=[Iéå])/é/o;
$s =~ s/(?<=[Iéå])y/é/o;
$s =~ s/éIé/éI/o; # for ééøä
$s =~ s/yä$/éä/o;
$s =~ s/y/éé/o;
$s =~ s/h/ä/o;
$s =~ s/-$//o; # if nadav doesn't print this stupid -, so would I.
$s =~ s/J/â'/go;
$s =~ s/Z/æ'/go;
$s =~ s/C/ö'/go;
$s =~ s/([ëîðôö])$/$fin{$1}/;
print $s.$detail."\n";
}
}