# 정규언어에 대한 열거 알고리듬
정규언어(regular language)에 대한 열거 알고리듬(enumeration algorithm)을 작성하는 방법에 대해 생각해 보고
그것을 하스켈로 작성하는 과제를 제시한다.

사실 이미 우리는 `RegExGen` 노트북에서 열거 알고리듬을 염두에 두고 정규식 의미함수를 하스켈로 작성해 보았다.
바람직한 열거 알고리듬에 가까운 형태의 프로그램에 가깝게 다가가기 위해 점진적으로 세 단계에 걸쳐 정규식 의미함수를 개선해 나갔다.
하지만 마지막 세번째 단계에서도 여전히 Kleene star를 처리하는 부분이 미진했다. Kleene Star를 한 단계만 사용하는 경우는
어느 정도 처리가 되었지만 Kleene star가 두 번 이상 중첩되는 경우 여전히 고르게 나열하지 못하는 결과를 얻었던 것을 기억하라.

여기서는 그 마지막 단계에서 시작해서 의미함수 `genRE`를 열거 알고리듬으로 활용할 수 있도록 수정하는 것이 이 과제의 목표이다.

## 정규식 데이타 타입 및 도우미 함수
이전에 살펴본 `RegExGen` 노트북에서와 마찬가지 정규식 데이타 타입과 몇 개의 도우미 함수들을 그대로 가져왔다.

In [1]:
data RE -- 정규식 데이타 타입
  = Empty
  | Epsilon
  | Alphabet Char
  | Concat RE RE
  | Union RE RE
  | Kleene RE
  deriving Show

-- 문자열을 Concat으로 이어진 정규식으로 변환해주는 유틸리티 함수
string2re :: String -> RE
string2re "" = Epsilon
string2re s  = foldr1 Concat (map Alphabet s)

import IHaskell.Display

ppRE r = Display [html(formatRE r)]

formatRE Empty = "∅"
formatRE Epsilon = "ε"
formatRE (Alphabet c) = c:[]
formatRE (Concat r1 r2) = formatRE r1 ++ formatRE r2
formatRE (Union r1 r2) = "(" ++ formatRE r1 ++ "+" ++ formatRE r2 ++ ")"
formatRE (Kleene r) = "(" ++ formatRE r ++ ")*"

## 정규식 의미함수
아래는 `RegExGen` 노트북에서 세번째 단계로 작성했던 의미함수 `genRE''`를 이름만 `genRE`로 바꿔서 그대로 옮겨놓았다.
아래 내용을 수정하여 열거 알고리듬이 되도록 해보자. 기본 아이디어는 무한한 회수로 (0회, 1회, 2회, 3회, ...) 반복되는 문자열들의 집합을 유한한 최대 반복 회수를 지정하여 그 일부를 구하는 함수를 작성하는 것이다.

In [2]:
-- 좀더 개선된 merge 함수
-- 길이순으로 우선 선택하고 길이가 같을 경우는 알파벳 사전순으로
merge [] ys = ys
merge xs [] = xs
merge (x:xs) (y:ys)
  | xlen < ylen = x : merge xs (y:ys)
  | xlen > ylen = y : merge (x:xs) ys
  | x < y       = x : merge xs (y:ys)
  | x > y       = y : merge (x:xs) ys
  | otherwise   = x : merge xs ys
  where
    xlen = length x
    ylen = length y

diagCartProd (x:xs) (y:ys) = (x,y) : ([(x,y) | x<-xs] `merge` diagCartProd xs ys `merge` [(x,y) | y<-ys])
diagCartProd _ _ = []

diagConcProd (x:xs) (y:ys) = (x++y) : ([x++y | x<-xs] `merge` diagConcProd xs ys `merge` [x++y | y<-ys]) 
diagConcProd _ _ = []

replicateRE r 0 = Epsilon
replicateRE r n = foldr1 Concat (replicate n r)

genRE :: RE -> Int -> [String]
genRE Empty          _ = []
genRE Epsilon        _ = [ "" ]
genRE (Alphabet c)   _ = [ c:"" ]
genRE (Concat r1 r2) n = genRE r1 n `diagConcProd` genRE r2 n
genRE (Union r1 r2)  n = genRE r1 n `merge` genRE r2 n 
genRE (Kleene r)     0 = [ "" ]
genRE (Kleene r)     n = genRE (Union Epsilon (Concat r (Kleene r))) (n-1)

In [3]:
merge ["","a","bb","aaa"] ["","ab","bb","aba"]

["","a","ab","bb","aaa","aba"]

In [4]:
diagCartProd [0,1,2,3,4,5,6,7,8] [0,1,2,3,4,5,6,7,8]
length $ diagCartProd [0,1,2,3,4,5,6,7,8] [0,1,2,3,4,5,6,7,8]
take 16 $ diagCartProd [0..] [0..]

[(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8),(1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(1,8),(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),(2,7),(2,8),(3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6),(3,7),(3,8),(4,0),(4,1),(4,2),(4,3),(4,4),(4,5),(4,6),(4,7),(4,8),(5,0),(5,1),(5,2),(5,3),(5,4),(5,5),(5,6),(5,7),(5,8),(6,0),(6,1),(6,2),(6,3),(6,4),(6,5),(6,6),(6,7),(6,8),(7,0),(7,1),(7,2),(7,3),(7,4),(7,5),(7,6),(7,7),(7,8),(8,0),(8,1),(8,2),(8,3),(8,4),(8,5),(8,6),(8,7),(8,8)]

81

[(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(0,8),(0,9),(0,10),(0,11),(0,12),(0,13),(0,14),(0,15)]

## 테스트

In [5]:
_1 = Alphabet '1'
_0 = Alphabet '0'
_00 = string2re "00"
_01 = string2re "01"
_10 = string2re "10"
_11 = string2re "11"

ppRE Empty
genRE Empty 0

ppRE Epsilon
genRE Epsilon 0

ppRE _0
genRE _0 0

ppRE _1
genRE _1 0

ppRE (Concat _0 _1)
genRE (Concat _0 _1) 0

ppRE (Union _0 _1)
genRE (Union _0 _1) 0

ppRE (Union _00 _11)
genRE (Union _00 _11) 0

ppRE (Concat (Union _00 _11) (Union _01 _10))
genRE (Concat (Union _00 _11) (Union _01 _10)) 0

ppRE (Kleene _1)
genRE (Kleene _1) 2
genRE (Kleene _1) 3

ppRE (Kleene _01)
genRE (Kleene _01) 2
genRE (Kleene _01) 3

[]

[""]

["0"]

["1"]

["01"]

["0","1"]

["00","11"]

["0001","0010","1101","1110"]

["","1","11"]

["","1","11","111"]

["","01","0101"]

["","01","0101","010101"]

In [6]:
ppRE (Union (Kleene _0) (Kleene _1))
genRE (Union (Kleene _0) (Kleene _1)) 2
genRE (Union (Kleene _0) (Kleene _1)) 3

ppRE (Concat (Kleene _0) (Kleene _1))
genRE (Concat (Kleene _0) (Kleene _1)) 2
genRE (Concat (Kleene _0) (Kleene _1)) 3

["","0","1","00","11"]

["","0","1","00","11","000","111"]

["","0","1","00","01","11","001","011","0011"]

["","0","1","00","01","11","000","001","011","111","0001","0011","0111","00011","00111","000111"]

In [7]:
ppRE (Kleene (Union (Kleene _00) (Kleene _11)))
genRE (Kleene (Union (Kleene _00) (Kleene _11))) 2
genRE (Kleene (Union (Kleene _00) (Kleene _11))) 3
genRE (Kleene (Union (Kleene _00) (Kleene _11))) 4

ppRE (Kleene (Union _00 _11))
genRE (Kleene (Union _00 _11)) 2
genRE (Kleene (Union _00 _11)) 3
genRE (Kleene (Union _00 _11)) 4
genRE (Kleene (Union _00 _11)) 5
genRE (Kleene (Union _00 _11)) 6

["","00","11"]

["","00","11","0000","0011","1100","1111","000000","000011","111100","111111"]

["","00","11","0000","0011","1100","1111","000000","000011","001100","001111","110000","110011","111100","111111","00000000","00000011","00001100","00001111","00111100","00111111","11000000","11000011","11110000","11110011","11111100","11111111","0000000000","0000000011","0000001100","0000001111","0000111100","0000111111","1111000000","1111000011","1111110000","1111110011","1111111100","1111111111","000000000000","000000000011","000000111100","000000111111","111111000000","111111000011","111111111100","111111111111"]

["","00","11","0000","0011","1100","1111"]

["","00","11","0000","0011","1100","1111","000000","000011","001100","001111","110000","110011","111100","111111"]

["","00","11","0000","0011","1100","1111","000000","000011","001100","001111","110000","110011","111100","111111","00000000","00000011","00001100","00001111","00110000","00110011","00111100","00111111","11000000","11000011","11001100","11001111","11110000","11110011","11111100","11111111"]

["","00","11","0000","0011","1100","1111","000000","000011","001100","001111","110000","110011","111100","111111","00000000","00000011","00001100","00001111","00110000","00110011","00111100","00111111","11000000","11000011","11001100","11001111","11110000","11110011","11111100","11111111","0000000000","0000000011","0000001100","0000001111","0000110000","0000110011","0000111100","0000111111","0011000000","0011000011","0011001100","0011001111","0011110000","0011110011","0011111100","0011111111","1100000000","1100000011","1100001100","1100001111","1100110000","1100110011","1100111100","1100111111","1111000000","1111000011","1111001100","1111001111","1111110000","1111110011","1111111100","1111111111"]

["","00","11","0000","0011","1100","1111","000000","000011","001100","001111","110000","110011","111100","111111","00000000","00000011","00001100","00001111","00110000","00110011","00111100","00111111","11000000","11000011","11001100","11001111","11110000","11110011","11111100","11111111","0000000000","0000000011","0000001100","0000001111","0000110000","0000110011","0000111100","0000111111","0011000000","0011000011","0011001100","0011001111","0011110000","0011110011","0011111100","0011111111","1100000000","1100000011","1100001100","1100001111","1100110000","1100110011","1100111100","1100111111","1111000000","1111000011","1111001100","1111001111","1111110000","1111110011","1111111100","1111111111","000000000000","000000000011","000000001100","000000001111","000000110000","000000110011","000000111100","000000111111","000011000000","000011000011","000011001100","000011001111","000011110000","000011110011","000011111100","000011111111","001100000000","001100000011","001100001100","00110000

In [9]:
import Data.List (filter)

compareLenThenComp x y = case cmplen of { EQ -> compare x y; z -> z }
  where cmplen = compare (length x) (length y)

set1 = filter ((12 >).length) $ sortBy compareLenThenComp $ nub $ genRE (Kleene (Union (Kleene _00) (Kleene _11))) 6

set2 = filter ((12 >).length) $ sortBy compareLenThenComp $ nub $ genRE (Kleene (Union _00 _11)) 6

set1
set2

set1 == set2

["","00","11","0000","0011","1100","1111","000000","000011","001100","001111","110000","110011","111100","111111","00000000","00000011","00001100","00001111","00110000","00110011","00111100","00111111","11000000","11000011","11001100","11001111","11110000","11110011","11111100","11111111","0000000000","0000000011","0000001100","0000001111","0000110000","0000110011","0000111100","0000111111","0011000000","0011000011","0011001100","0011001111","0011110000","0011110011","0011111100","0011111111","1100000000","1100000011","1100001100","1100001111","1100110000","1100110011","1100111100","1100111111","1111000000","1111000011","1111001100","1111001111","1111110000","1111110011","1111111100","1111111111"]

["","00","11","0000","0011","1100","1111","000000","000011","001100","001111","110000","110011","111100","111111","00000000","00000011","00001100","00001111","00110000","00110011","00111100","00111111","11000000","11000011","11001100","11001111","11110000","11110011","11111100","11111111","0000000000","0000000011","0000001100","0000001111","0000110000","0000110011","0000111100","0000111111","0011000000","0011000011","0011001100","0011001111","0011110000","0011110011","0011111100","0011111111","1100000000","1100000011","1100001100","1100001111","1100110000","1100110011","1100111100","1100111111","1111000000","1111000011","1111001100","1111001111","1111110000","1111110011","1111111100","1111111111"]

True