## Setup

In [8]:
# Imports

import hfst_dev as hfst
import graphviz
import random

import itertools
import random
from nltk.parse.generate import generate
from nltk.probability import FreqDist
from nltk import CFG
from nltk import grammar

In [9]:
# Stream English 

istream = hfst.HfstInputStream('English')
assert istream.is_good() == True
English = istream.read()
istream.close()

In [10]:
# Copied over from english.ipynb to sample input and output

def sample_input(x,n=8,cycles=3):
        x2 = x.copy()
        x2.input_project()
        x2.minimize()
        return(random.sample(set(x2.extract_paths(max_cycles=3).keys()),n))
def sample_output(x,n=8,cycles=3):
        x2 = x.copy()
        x2.output_project()
        x2.minimize()
        return(random.sample(set(x2.extract_paths(max_cycles=3).keys()),n))

In [11]:
# Set up definitions from phoneclass.fst 

defs = {'English' : English}

VowAA = hfst.regex('AA0 | AA1 | AA2', definitions=defs)
defs['VowAA'] = VowAA
VowAE = hfst.regex('AE0 | AE1 | AE2', definitions=defs)
defs['VowAE'] = VowAE
VowAH = hfst.regex('AH0 | AH1 | AH2', definitions=defs)
defs['VowAH'] = VowAH
VowAO = hfst.regex('AO0 | AO1 | AO2', definitions=defs)
defs['VowAO'] = VowAO
VowAW = hfst.regex('AW0 | AW1 | AW2', definitions=defs)
defs['VowAW'] = VowAW
VowAY = hfst.regex('AY0 | AY1 | AY2', definitions=defs)
defs['VowAY'] = VowAY
VowEH = hfst.regex('EH0 | EH1 | EH2', definitions=defs)
defs['VowEH'] = VowEH
VowER = hfst.regex('ER0 | ER1 | ER2', definitions=defs)
defs['VowER'] = VowER
VowEY = hfst.regex('EY0 | EY1 | EY2', definitions=defs)
defs['VowEY'] = VowEY
VowIH = hfst.regex('IH0 | IH1 | IH2', definitions=defs)
defs['VowIH'] = VowIH
VowIY = hfst.regex('IY0 | IY1 | IY2', definitions=defs)
defs['VowIY'] = VowIY
VowOW = hfst.regex('OW0 | OW1 | OW2', definitions=defs)
defs['VowOW'] = VowOW
VowOY = hfst.regex('OY0 | OY1 | OY2', definitions=defs)
defs['VowOY'] = VowOY
VowUH = hfst.regex('UH0 | UH1 | UH2', definitions=defs)
defs['VowUH'] = VowUH
VowUW = hfst.regex('UW0 | UW1 | UW2', definitions=defs)
defs['VowUW'] = VowUW

Vow0 = hfst.regex('AH0| IH0| ER0| IY0| OW0| AA0| EH0| UW0| AE0| AO0| AY0| EY0| AW0| UH0| OY0', definitions=defs)
defs['Vow0'] = Vow0
Vow1 = hfst.regex('EH1| AE1| AA1| IH1| IY1| EY1| OW1| AO1| AY1| AH1| UW1| ER1| AW1| UH1| OY1', definitions=defs)
defs['Vow1'] = Vow1
Vow2 = hfst.regex('EH2| EY2| AE2| AY2| AA2| IH2| OW2| IY2| AO2| UW2| AH2| AW2| ER2| UH2| OY2', definitions=defs)
defs['Vow2'] = Vow2

Vow = hfst.regex('Vow0 | Vow1 | Vow2', definitions=defs)
defs['Vow'] = Vow

Nas = hfst.regex('N | M | NG', definitions=defs)
defs['Nas'] = Nas

Phone = hfst.regex('AH0| N| S| L| T| R| K| D| IH0| M| Z| ER0| IY0| B| EH1| P| AE1| AA1| IH1| F| G| V| IY1| NG| HH| EY1| W| SH| OW1| OW0| AO1| AY1| AH1| UW1| JH| Y| CH| AA0| ER1| EH2| EY2| AE2| AY2| AA2| EH0| IH2| TH| AW1| OW2| UW0| IY2| AO2| AE0| UH1| AO0| AY0| UW2| AH2| EY0| OY1| AW2| DH| ZH| ER2| UH2| AW0| UH0| OY2| OY0', definitions = defs)
defs['Phone'] = Phone

## Creating Classes of Words Based on Stress

#### One syllable words

In [30]:
expr = '[English .o. [[ [Phone - Vow]* Vow [Phone - Vow]* ].l]].u'
one_syl = hfst.regex(expr, definitions=defs)

In [31]:
sample_input(one_syl)

['gant', 'fa|ire', 'it', 'sones', 'dre|w', 'ro|urke', 'dec', 'ahlf']

#### Two syllable words with main stress coming first

In [42]:
expr = '[English .o. [[[Phone - Vow]* Vow1 [Phone - Vow]* Vow0 [Phone - Vow]*] | [[Phone - Vow]* Vow2 [Phone - Vow]* Vow1 [Phone - Vow]*]].l].u'
two_syl_first = hfst.regex(expr, definitions=defs)

In [50]:
sample_input(two_syl_first)

['batdorf',
 'del|lums',
 't|hac|kston',
 'ornis|h',
 'was|sman',
 'gi|rton',
 'mol|len',
 'glyn|nie']

#### Two syllable words with main stress coming second 

In [73]:
expr = '[English .o. [[[Phone - Vow]* Vow0 [Phone - Vow]* Vow1 [Phone - Vow]*] | [[Phone - Vow]* Vow2 [Phone - Vow]* Vow1 [Phone - Vow]*]].l].u'
two_syl_sec = hfst.regex(expr, definitions=defs)

In [74]:
sample_input(two_syl_sec)

['as|hamed',
 'pras|hant',
 'vanbrunt',
 'onscreen',
 'li|ret|te',
 'pruet|te',
 'elston',
 'bo|uman']