-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
1029 lines (918 loc) · 44 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# #####################################################################################################################
# Change History
# v1.5
# *** check out demo function try_me_grid()
# - initially working grid crossword generator with multiline string grid definition
# (see try_me_2 for details)
# - bug fixes, various modifications
# v1.4.2
# - class GridCrossWord: initially working conversion from string to Word/Cell structure
# still fixing minor details
# - class Word: modified _cells_to_sequences function: only convert cells containing actual letters
# (more like a-z than %,& or _)
# v1.4.1
# - class GridCrossWord: working on conversion from string to Word/Cell structure
# v1.4
# - new class GridCrossWord for constrcting crosswords inside 2d array of Cells
# using Sequence, Dictionary, Cell and Word classes
# * check out function 'sample' inside this GridCrossWord class
# v1.3.1
# - class CrossWords: modified __cells_to_sequence - bugfix
# - minor fixes
# - - class CrossWords: renamed to Word
# v1.3 (kinda functional?)
# *** check out demo function try_me_2
# * this version SEEMS to be capable of generating simple grid crosswords
# - added demo function "try_me_2" for creating simple grid crossword
# - printing the grid containing (hopefully) crossword(s)
# - Working on defining Words in space, generating new type of crossword puzzle
# v1.2.1
# - class CrossWord: development of core structure
# - class Cell: development of core structure
# V1.2
# - started implementing new classes:
# -- Cell: a logical cell in a crosswords diagram (e.g. empty place for letter, huint, etc)
# -- CrossWord: single word in a crosswords diagram: contains Cell(s), Sequence(s)
# v1.1.1
# - Dictionary class: added 'session' attribute -
# for gathering meta-data or 'session' stats, eg. dict loading time, number of uses, etc
# - Dictionary class: match_words now works with no *args
# - minor changes
# v1.1
# - implemented function 'simple_crossword_2' which words like 'simple_crossword'
# but also accepts an iterable of lengths and letter positions for solution words.
# letter positions may be negative (from the end of a word)
# * sample usage in 'try_me_1' function
# - minor changes
# - modified and elaborated, metadata and comments. Clean up.
# v1.0(functional, actualy useful) <<<=====
# *** this version can actually generate a legit crossword - yey ^^
# *** see function try_me_1() for ane xample !
# - implemented simple crossword generator function
# take a look at: function 'simple_crossword'
# - class Dictionary: commented out sequences sorting code - not necessary atm
# - various changes
# ------------
# v0.3.5
# - layout changes to enable code to be imported as module
# - various changes
# - working on simple, sample crossword generator
# v0.3.4
# - various fixes
# - removed origin_words functions (merged into match_words)
# - Sequence class: added 'inWord' now accounting for negative position (from the end of a word)
# v0.3.3
# - Sequence class: added 'inWord' function that determines if sequence belong to a word (accounting position)
# - Sequence class: added match_words function that accepts arbitrary number of sequences and word length
# v0.3.2
# - Sequence class: working on 'origin_words_2' function that accepts 2 Sequence objects
# - Sequence class: added 'origin_words_1' functino - this one accepts a single Sequence object
# - Sequence class: added 'position' attribute - absttract sequence position in a virtual word,
# changed init args
# - minor changes (printing, etc)
# v0.3.1
# - various considerable changes
# v0.3(functional)
# *** this version is fine for running in an interactive session
# *** run the code - this will load dictionary file and then play with 'origin_words' live
# - moved code to a new class Dictionary
# - implemented function "origin_words" that returns all words (of any or given length)
# that contain particular sequence (at any or given position)
# - generalized sorting code
# - whole words are no more in letter-sequences unless they are actually also a part of other words
# - various changes: structural, others
# - minor changes (e.g. removed plotly histogram examples code)
# v0.2.2
# - further implementation of Sequence class
# - implemented logical connections between Sequence objects
# - various changes: printing, structure, etc.
# v0.2.1
# - started implementing Sequence class
# this class will represent a sequence of letters (not necessarily a whole, legit word but also)
# - started creating sequence dictionaries with objects and conneting them together
# v0.2(functional)
# *** this version is fine for just statistical analysis of a word list
# *** namely: generate dictionaries of sequences of letters (different lengths)
# *** together with information how many ocurrances of each sequence there are
# v0.1.4
# - generalized similar, repeating pieces of code for counting n-letter sequences
# (this seems to have worsened the performance by a bit, but makes it sooo much easie rto work with the code)
# v0.1.3
# - counting sequences up to 14 letters long
# - all sequences dictionaries sorted by number of occurances
# - minor changes in structure
# v0.1.2
# - counting sequences up to 12-letters long
# - various changes in code structure
# v0.1.1
# - counting sequences of 1-,2-,3-,4-,5-,6- and 7-letters
# - added memory info printout
# - added loading progress printout
# v0.1
# - added loading dictionary from txt file
# - added statistics: word count, words' lengths count, avg word len, letter count, 2-letter pairs count,
# - added histograms (plotly) - words' lengths and single letter occurances
#
# #####################################################################################################################
import os
import psutil
import random
import time
from random import randint
import functools
D = None
def debug(func):
"""Print the function signature and return value"""
@functools.wraps(func)
def wrapper_debug(*args, **kwargs):
args_repr = [repr(a) for a in args] # 1
kwargs_repr = [f"{k}={v!r}" for k, v in kwargs.items()] # 2
signature = ", ".join(args_repr + kwargs_repr) # 3
print(f"Calling {func.__name__}({signature})")
value = func(*args, **kwargs)
print(f"{func.__name__!r} returned {value!r}") # 4
return value
return wrapper_debug
class Sequence:
"""
A Sequence of letters that may represent a legit word or just a part of some word.
It has various properties and/or methods that make sense in the context of crossword puzzle making.
It is used as a building block of Dictionary class.
It can also be used as abstract sequence of letters (as used by CrossWords class)
"""
def __init__(self, seq=None, pos=None):
self.sequence = str(seq) # this particular sequence string
self.position = pos # position of the sequence in a (virtual) word (negative count from the end of word)
self.word_source_ref = None
self.count = None # how many times this sequence appeared in the source data
self.origin_words = None # dictionary of words that contain this sequence
self.contained_sequences = None
self.inWord = self.in_word # check if this sequence fits a word (is contained and at position, if provided)
self.explicit = None # property of being explicit - useful for defining sequences that cannot change
def __len__(self):
return len(self.sequence)
def __str__(self):
return self.sequence
def __repr__(self):
return self.sequence
# return False
def in_word(self, word):
"""
Check if this sequence is contained in word (at all or at given position)
:param word: word to be checked against
:return: True or False
"""
# sequence not in the word at all
if word.find(self.sequence) == -1:
return None
# sequence was found in the word
# if sequence does not specify position, return True
if self.position is None:
return word.find(self.sequence)
# sequence specifies position, check it
# if position negative, convert it in the context of provided word
position_converted = self.position
if self.position < 0:
position_converted = len(word) + self.position
w_strip = word
pos_ok = None
if position_converted == w_strip.find(self.sequence):
pos_ok = True
else:
while position_converted >= w_strip.find(self.sequence) > -1:
if w_strip.find(self.sequence) == position_converted:
pos_ok = True
break
# translate position because current word got cut
position_converted -= w_strip.find(self.sequence) + 1
# update word
w_strip = w_strip[w_strip.find(self.sequence) + 1:]
pass
return pos_ok
class Dictionary:
"""
Wrapper for dictionary structure
"""
def __init__(self, source_file=None, depth=None):
self.dictionary = { # data structure describing the dictionary loaded from source file
'words': {}, # list of words from the source file
'count': 0, # number of words in the dictionary
'lengths': [0] * 30, # number of words of given length
'len_max': 0, # shortest word's length
'len_min': 30, # longest word's length
'len_avg': 0, # average word length
'letters1': {}, # 1-letter sequences
'letters2': {}, # 2-letter sequences
'letters3': {}, # 3-letter sequences
'letters4': {}, # 4-letter sequences
'letters5': {}, # 5-letter sequences
'letters6': {}, # 6-letter sequences
'letters7': {}, # 7-letter sequences
'letters8': {}, # 8-letter sequences
'letters9': {}, # 9-letter sequences
'letters10': {}, # 10-letter sequences
'letters11': {}, # 11-letter sequences
'letters12': {}, # 12-letter sequences
'letters13': {}, # 13-letter sequences
'letters14': {}, # 14-letter sequences
'letters15': {}, # 15-letter sequences
'letters16': {}, # 16-letter sequences
'letters17': {}, # 17-letter sequences
'letters18': {}, # 18-letter sequences
}
self.source_file = source_file
self.depth = depth
self.load = self._load_dictionary
self.session = {
'match_requests': None,
'loading_time': None,
'sequence_count': None,
'word_count': None
}
if self.source_file is not None and self.depth is not None:
self._load_dictionary(self.source_file, self.depth)
def _load_dictionary(self, source_file, depth=None):
"""
Load dictionary from file.
Create dict structures that ontain words and letter sequences of all possible lengths.
Create Sequence objects and link words with sequences.
Funfact: This is pretty the prime function of this project
:param source_file: dictionary source file: expecting txt file with one entry (preferably single word) per line
:return: nothing
"""
# count Sequence objects
sequence_objects_num = 0
# # start counting time of loading basic dictionary structure
# performance timer 'start'
timing = time.perf_counter()
# get total count for progress reference
self.dictionary['count'] = len(source_file.readlines())
source_file.seek(0)
# # ======================================================
# # build sequences database and link sequences with words
# get or set dictionary depth
if depth is None:
depth = self.depth
else:
self.depth = depth
# reset session.match_requests
self.session['match_requests'] = 0
# current word number
word_counter = 0
# detector for progress update notification
# [0] - next milestone progress, [1] - current progress
progress_tick = [0, -1]
# min and max sequence length to build (mostly for testing purposes - speed)
seq_len_min = depth[0]
seq_len_max = depth[1]
# take each line - word from a file
for line in source_file:
# # update loading progress
word_counter += 1
progress_tick[1] = (word_counter / self.dictionary['count']) * 100
# current progress exceeds current milestone
if progress_tick[1] > progress_tick[0]:
# notify user of achieved milestone
print('Loading source file... '+str(progress_tick[0])+'%' +
' ['+str(word_counter)+'/'+str(self.dictionary['count'])+']', end='\r')
# set next milestone
progress_tick[0] += 1
# remove endline character
line = line.rstrip()
# assign a word object
word_obj = Sequence(line)
word_obj.word_source_ref = word_counter
self.dictionary['words'][line] = word_obj
# # update lengths information in dict_stats
# max and min lengths
if len(line) > self.dictionary['len_max']:
self.dictionary['len_max'] = len(line)
if len(line) < self.dictionary['len_min']:
self.dictionary['len_min'] = len(line)
# lengths histogram data
try:
self.dictionary['lengths'][len(line)] += 1
except IndexError as e:
print(e)
raise e
# # update occurances of [s1..s2)-letters sequences
# # create Sequence objects wor words and letter-sequences, 'connect' objects
# convert str into list
letters = [x for x in line]
# check all sequence lengths
for seq_len in range(seq_len_min, seq_len_max+1):
for idx in range(len(letters)):
# try getting current letter and the following - 2-letters sequences
try:
# construct sequence key - basically a sequence of letters from current word
seq_key = ''
for seq_pos in range(seq_len):
seq_key += letters[idx+seq_pos]
# dont add current word as a sequence
# words that are not parts of any other words will not be included as letter sequences
# they will only be wisible in 'words' dictionary
# if seq_key == line:
# continue
# try updating entry in dict_stats['lettersX']
try:
# increase sequence ocurrnace counter
self.dictionary['letters'+str(seq_len)][seq_key].count += 1
# add current word to origins dictionary
self.dictionary['letters' + str(seq_len)][seq_key].origin_words[line] = word_obj
# also update word object with reference to this sequence
try:
# word already has contained_sequences dictionary
word_obj.contained_sequences[seq_key] = self.dictionary['letters'+str(seq_len)][seq_key]
except TypeError:
# word doesnt yet have contained_sequences dictionary
# initialize it
word_obj.contained_sequences = \
{seq_key: self.dictionary['letters'+str(seq_len)][seq_key]}
# no entry yet, create it
except KeyError:
# create Sequence object, assign actual letter sequence
sequence_objects_num += 1
seq_obj = Sequence(seq_key)
seq_obj.count = 1
# add current word as first origin
seq_obj.origin_words = {line: word_obj}
# also update word object with reference to this sequence
try:
# word already has contained_sequences dictionary
word_obj.contained_sequences[seq_key] = seq_obj
except TypeError:
# word doesnt yet have contained_sequences dictionary
# initialize it
word_obj.contained_sequences = {seq_key: seq_obj}
# put object into dictionary
self.dictionary['letters'+str(seq_len)][seq_key] = seq_obj
# no more sequences in current word
except IndexError:
pass
# calculate average word length for dict_stats
self.dictionary['len_avg'] = \
sum([self.dictionary['lengths'][i] * i
for i in range(len(self.dictionary['lengths']))]) / self.dictionary['count']
# 'progress print ending (and new line symbol)
print('Loading source file... 100% - '+str(self.dictionary['count'])+' words loaded.')
# # stop counting time of loading basic dictionary structure
# performance timer 'stop' and print
self.session['loading_time'] = time.perf_counter() - timing
print(f'Depth {depth} dictionary structure loading time: ', self.session['loading_time'])
# # sort letters sequences dictionaries by number of occurances
# for seq_len in range(seq_len_min, seq_len_max + 1):
# self.dictionary[f'letters{seq_len}'] = \
# {k: v for k, v in
# sorted(self.dictionary[f'letters{seq_len}'].items(), key=lambda item: item[1].count, reverse=True)}
# print dictionary info
# number of words
self.session['word_count'] = len(self.dictionary["words"])
# print(f'Words: {len(self.dictionary["words"])}')
self.session['sequence_count'] = sequence_objects_num
print(f'Sequence objects: {sequence_objects_num}')
# print(f'Dictionary ~size in memory: {(mem_use_kb_2-mem_use_kb_1)/1024}Mb')
def match_words(self, *args, length=None):
"""
:param args: 1 or more sequence(s) objects
sequence object contains string
and possibly a position of the sequence(s) in a word (any if None)
:param length: length of words (any if None)
:return: dictionary of words (of given length or all lengths) that contain the sequence
(at given or any position)
"""
# 1. check qhich sequence gives the shortest list of origin words and start with it
# this way the rest of filtering is done on the smallest initial set
# 2. filter shortest_seq_words by length provided
# 3. check all sequences against 'words'
# for every word check if:
# - every sequence in this word at certain position
# * variable 'words' always holds words filtered in previous step
# at least one Sequence to match
if len(args) > 0:
# find the sequence that yields the least amount of words
# this is an optimization step - not functional
shortest_seq_arg_idx = 0
for arg_idx in range(len(args)):
key = f'letters{len(args[arg_idx])}'
if len(self.dictionary[key][str(args[arg_idx])].origin_words) < \
len(self.dictionary
[f'letters{len(args[shortest_seq_arg_idx])}'][str(args[shortest_seq_arg_idx])].origin_words):
shortest_seq_arg_idx = arg_idx
# get sequence words (of which sequence yields the least amount of words)
key = f'letters{len(args[shortest_seq_arg_idx])}'
# gett origin words directly from an appropriate letter-sequence dictionary
words = self.dictionary[key][str(args[shortest_seq_arg_idx])].origin_words
# also check if this sequence isnt a word by itself
if args[shortest_seq_arg_idx] in self.dictionary['words']:
words[str(args[shortest_seq_arg_idx])] = self.dictionary[key][str(args[shortest_seq_arg_idx])]
# no sequences to match - check whole dictionary
else:
words = self.dictionary['words']
# filter by length, if provided
words_filtered = {}
if length is not None:
for w in words:
if len(w) == length:
words_filtered[w] = words[w]
# update words dictionary
words = words_filtered
# # filter by all sequences and their positions
# for every sequence provided
words_filtered = {}
# for every word check if sequence is contained and if position is ok
for w in words:
# filtering result
word_ok = True
# go through all sequences for this particular word w
for arg_idx in range(len(args)):
# get current sequence
s = args[arg_idx]
# check if sequence fits the word
if s.in_word(w) is None:
word_ok = False
break
if word_ok is True:
words_filtered[w] = words[w]
words = words_filtered
# increase session.match_requests
self.session['match_requests'] += 1
return words
# if __name__ == '__main__':
def simple_crossword(solution):
"""
Construct a simple crossword with questions in rows and answer in a single column.
There are as many question as there are letters in the solution.
:param solution: string representing the solution
:return: a crossword in string format
"""
global D
print(f'Solution: {solution}\n{"- "*10}')
crossword_string = ''
# go through every letter of solution and figure out a question for it
offset = 20
for letter in solution:
if letter is ' ':
crossword_string_line = ' ' * offset + '| |' # just an empty line, a space in solution
else:
tries_left = 100 # this many tries to randomly find matching word
while tries_left is not 0:
word_length = randint(3, 15)
letter_position = randint(1, word_length-1) # which letter in answer belongs to solution
possible_words = D.match_words(Sequence(letter, letter_position), length=word_length)
tries_left -= 1
if len(possible_words) is not 0:
break # possible words found! break the try loop and go choose a word
else:
raise(LookupError(f'Couldnt find matching words for letter: {letter}'))
# random selection from the set of matching words
chosen_word = random.choice(list(possible_words.keys()))
# offset for pretty printing
word_offset = offset-letter_position
# crossword prettyprint line for the letter
crossword_string_line = ' '*word_offset + ''.join([
f'|{chosen_word[n].upper()}|' if n == letter_position else chosen_word[n]
for n in range(len(chosen_word))
])
crossword_string += crossword_string_line + '\n'
print(crossword_string)
def simple_crossword_2(solution, lengths=None, positions=None):
"""
Construct a simple crossword with questions in rows and answer in a single column.
There are as many question as there are letters in the solution.
Each word length can be specified (or not - then random).
Each solution letter's position in its word can be specified (or not - then random)
:param solution: string representing the solution
:param lengths: iterable of lengths of words
:param positions: iterable of solutions' letters' positions in words
:return: a crossword in string format
"""
global D
print(f'Solution: {solution}\n{"- "*10}')
crossword_string = ''
# go through every letter of solution and figure out a question for it
offset = 20
letter_cnt = 0
for letter in solution:
if letter is ' ':
crossword_string_line = ' ' * offset + '| |' # just an empty line, a space in solution
else:
# are position and length provided for current word or will it be random
if positions is not None and lengths is not None:
# if position and length of the word is provided there will only be one 'match_words' call
tries_left = 1
else:
tries_left = 100 # this many tries to randomly find matching word
while tries_left is not 0:
# get expected word length from argument or generate random
try:
word_length = lengths[letter_cnt]
except TypeError or IndexError:
word_length = randint(3, 15)
# get expected letter position from argument or generate random
try:
letter_position = positions[letter_cnt]
except TypeError or IndexError:
letter_position = randint(3, 15)
# get matching words
possible_words = D.match_words(Sequence(letter, letter_position), length=word_length)
# if no tries left
tries_left -= 1
if len(possible_words) is not 0:
# random selection from the set of matching words
chosen_word = random.choice(list(possible_words.keys()))
# if position negative, convert it in the context of provided word
position_converted = letter_position
if letter_position < 0:
position_converted = len(chosen_word) + letter_position
# offset for pretty printing
word_offset = offset - position_converted
# crossword prettyprint line for the letter
crossword_string_line = ' ' * word_offset + ''.join([
f'|{chosen_word[n].upper()}|' if n == position_converted else chosen_word[n]
for n in range(len(chosen_word))
])
break # possible words found! go to the next letter
else:
# # no matching word found
# raise(LookupError(f'Couldnt find matching words for letter: {letter}'))
# construct crossword line without a word
crossword_string_line = \
' ' * (offset-2) + '_' * 2 + '|'+letter.upper()+'|' + '_' * 2 # a space in solution
crossword_string += crossword_string_line + '\n'
letter_cnt += 1
print(crossword_string)
def try_me_1():
"""
Demo function showing how to
- use Dictionary class
- use Sequence class
- create a simple crossword using simple_crossword
- create a simple crossword using simple_crossword_2
:return:
"""
global D
# # print some stuff and initiate things
print('*'*30)
# open source text file - one entry per line (preferably one word)
# source_file = open("slowa2.txt", encoding="utf-8")
source_file = open("small.txt", encoding="utf-8")
S = Sequence # class alias
D = Dictionary(source_file, depth=(1, 18)) # Dictionary module root object
d = D.dictionary
# get words starting with 'a' and ending with 'ing', containing letter b, with length 8
D.match_words(S('a', 1), S('ing', -3), S('i'), length=8)
# report memory usage
mem_use_kb_2 = psutil.Process(os.getpid()).memory_info()[0] / 1024 # memory usage by the process [kb]
print('Process memory after loading:', mem_use_kb_2, 'kb')
print('*' * 30)
# # generate sample crosswords
simple_crossword('python rocks')
simple_crossword_2('python', lengths=[10 for x in range(15)], positions=[x for x in range(15)])
# simple_crossword_2('adrian iwonka', positions=[x for x in range(15)])
simple_crossword_2('python', lengths=[x for x in range(3, 15, 2)], positions=[-x for x in range(2, 15)])
# ======================================================================================================================
"""
Take a grid of empty squares
Mark some horizontal/vertical sequences of squares - they will be words to guess
Match word to sequences according to their connections (crossing each other in the grid)
"""
#
#
# # requested word's length
# word_length = 6
# # requested word's first square's coordinates (x, y)
# word_origin_x, word_origin_y = 2, 2
# word_origin = (word_origin_x, word_origin_y)
# # requested word's direction (dx, dy) in the grid
# word_dx = 1
# word_dy = 0
# word_direction = (word_dx, word_dy)
class Cell:
"""
A cell in a crosswords diagram (e.g. empty place for letter, huint, etc)
"""
def __init__(self, coordinates, content):
self.coordinates = coordinates # (x,y) - its place in physical puzzle space
self.content = content if content is not None else {} # content dict of the cell (e.g. letter, hint, etc)
self.explicit = None # property of being explicit - useful for defining sequences that cannot change
def __str__(self):
s = ' '
try:
s = self.content['letter']
except KeyError:
pass
return s
class Word:
"""
Single word in a crosswords diagram.
It is bound with crosswords' grid via Cell(s) objects
It is bound with dictionary via Sequence objects
(though not strictly the same instances that dictionary uses)
"""
def __init__(self, *args, **kwargs):
# the sequence cells the word is contained in (ordered - first to last letter)
self.cells = kwargs['cells'] if 'cells' in kwargs else []
# dictionary reference
self.dictionary = kwargs['dictionary'] if 'dictionary' in kwargs else None
self.matching_words = None # matching words (constrained by sequences and cells)
self._sequences = [] # one or more Sequence objects to match into the word
self.match = self._match # match words from dictionary to the restrictions
self.random_fill = self._random_fill # match potential words and fill the cells with it
def __str__(self):
s = ''.join(str(c) for c in self.cells)
if s is not None:
return s
else:
return ''
def __fill_cells(self, s: str):
if len(s) is not len(self.cells):
return False
for i in range(len(s)):
self.cells[i].content['letter'] = s[i]
# print(f'__fill_cells: {s}')
def _cells_to_sequences(self):
found_str = ''
start_pos = 0
for cell_num in range(len(self.cells)):
# try getting this cell's letter
try:
letter = self.cells[cell_num].content['letter']
if letter < 'a' or letter > 'z':
raise KeyError()
found_str += letter
if len(found_str) == 1:
start_pos = cell_num # remember position of first letter in current string
# if this was the last letter, add it as sequence before finishing
if cell_num is len(self.cells)-1:
self._sequences.append(Sequence(found_str, start_pos)) # add sequence
# this cell has no letter - create sequence object if string found
except KeyError:
if len(found_str) > 0:
self._sequences.append(Sequence(found_str, start_pos)) # add sequence
found_str = ''
def _match(self):
"""
The idea is that both self.sequences and self.cells contain info describing the word
so the task is to use the info thats already there and fill the rest
in sucha way that the resulting word is contained in the dictionary
:return:
"""
# translate self.cells into sequences (string between spaces) and add them to self.sequences
self._cells_to_sequences()
# use all sequences to find a word from dictionary
matched_words = self.dictionary.match_words(*self._sequences, length=len(self.cells))
self.matching_words = matched_words
# print(matched_words)
def _random_fill(self):
# print(f'_random_match() call from objid:{id(self)})
# if self.matching_words is None:
# self._match()
self._match()
# random selection from the set of matching words
try:
chosen_word = random.choice(list(self.matching_words.keys()))
except IndexError:
raise LookupError(f'Matching a word failed: {self}')
self.__fill_cells(chosen_word)
# print(f'_random_fill: {chosen_word}')
class GridCrossWord:
def __init__(self, grid_string=None):
self.grid_string = grid_string
self.grid = None # grid containing Cell objects
self.grid_size = None
self.words = None # Word objects for this crossword
if self.grid_string is not None:
self._string_to_grid(self.grid_string)
pass
def __str__(self):
"""
Pretty-print the grid
:return:
"""
s = ''
g = []
g.append(f'{" "*5}{"".join([str(x)+" "*(3-len(str(x))) for x in range(self.grid_size[0])])}')
g.append('')
for y in range(self.grid_size[1]):
gridline = f'{y}{" "*(5-len(str(y)))}'
for x in range(self.grid_size[0]):
gridline += (str(self.grid[x][y]).upper()+' ') if self.grid[x][y] is not None else ' '
g.append(gridline)
g.reverse()
for line in g:
s += f'{line}\n'
return s
def _string_to_grid(self, grid_string):
"""
Load grid crossword data from multiline string
Each line in string is a row, each sign is in separate column
This should accept a grid with '.' (dot) in every inactive Cell and '*' start in every empty Cell.
This should construct Words and fill them accordingly
to what horizontal and vertical star sequences are described.
e.g.
grid_string = *...
****
*...
means:
Vertical word, 3 letters in rirst colums
Horizontal word, 4 letters in second row
:param grid_string: as described above
:return:
"""
global D
# Create dictionary from list file
if D is None:
source_file = open("small.txt", encoding="utf-8")
D = Dictionary(source_file, depth=(1, 10)) # Dictionary module root object
# def string_to_rows_and_cols(s): <- currently lives in global scope
# def stars_string_to_sequences(s, min_length=0): <- currently lives in global scope
# print(f'Grid string: {grid_string}')
# # convert multiline string into list of row strings and list of column strings
# # assuming every row has the same length
[rows, columns] = string_to_rows_and_cols(grid_string)
num_rows = len(rows)
num_columns = len(columns)
# print(f'rows in string: {rows}')
# print(f'columns in string: {columns}')
# construct a grid with required size and fill with Cell objects
grid_size_x, grid_size_y = num_columns, num_rows
self.grid_size = (grid_size_x, grid_size_y)
self.grid = [[Cell((x, y), None) for y in range(self.grid_size[1])] for x in range(self.grid_size[0])]
words = []
# construct horizontal Words
# go through each row and find all star-sequences, turn them into Word objects
for rn in range(num_rows):
# get all sequences in this row
sequences = stars_string_to_sequences(rows[rn], min_length=2)
# print(f'sequences in row: {sequences}')
# construct horizontal words from sequences in this row
for s in sequences:
cells = []
for c in s:
self.grid[c][rn].content['letter'] = '*'
cells.append(self.grid[c][rn])
words.append(Word(cells=cells, dictionary=D))
# construct vertical Words
# go thgough each column and find all star-sequences, turn them into Word objects
for cn in range(num_columns):
sequences = stars_string_to_sequences(columns[cn], min_length=2)
# print(f'sequences in row: {sequences}')
# construct vertical words from sequences in this columns
for s in sequences:
cells = []
for r in s:
self.grid[cn][r].content['letter'] = '*'
cells.append(self.grid[cn][r])
cells.reverse() # accounting for fact that coorinates go down->up and words go up->down
words.append(Word(cells=cells, dictionary=D))
self.words = words
return words
def attempt(self):
self._string_to_grid(self.grid_string)
# attempt solving the crossword requirements
# try matching words one by one in the order their stored
for w in self.words:
w.random_fill()
pass
return self
def grind(self, tries=50):
# attempt attempting a few times :D
tries = tries
while tries is not 0:
try:
return self.attempt()
except LookupError:
tries -= 1
def sample(self):
global D
# Create dictionary from list file
if D is None:
source_file = open("small.txt", encoding="utf-8")
D = Dictionary(source_file, depth=(1, 10)) # Dictionary module root object
# grid of squares of the crossword
grid_size_x, grid_size_y = 10, 10
self.grid_size = (grid_size_x, grid_size_y)
self.grid = [[Cell((x, y), None) for y in range(self.grid_size[1])] for x in range(self.grid_size[0])]
""" Create sample words structure """
# # sample word1: 5 letter word, starting at (0,4), going down
coords = [(0, 4-y) for y in range(5)]
cells = [self.grid[c[0]][c[1]] for c in coords]
# first letter 'm'
# cells[0].content['letter'] = 'm'
# cells[0].explicit = True
cw1 = Word(cells=cells, dictionary=D)
# # sample word2: 6 letter word, starting at (0, 3), going right
coords = [(x, 3) for x in range(6)]
cells = [self.grid[c[0]][c[1]] for c in coords]
cw2 = Word(cells=cells, dictionary=D)
# # sample word3: 8 letter word, starting at (0, 1), going right
coords = [(x, 1) for x in range(8)]
cells = [self.grid[c[0]][c[1]] for c in coords]
cw3 = Word(cells=cells, dictionary=D)
# # sample word4: 7 letter word, starting at (2, 6), going down
coords = [(2, 6-y) for y in range(7)]
cells = [self.grid[c[0]][c[1]] for c in coords]
cw4 = Word(cells=cells, dictionary=D)
# # sample word5: 8 letter word, starting at (4, 8), going down
coords = [(4, 8-y) for y in range(8)]
cells = [self.grid[c[0]][c[1]] for c in coords]
cw5 = Word(cells=cells, dictionary=D)
def compile_crossword():
cw1.random_fill()
cw2.random_fill()
cw3.random_fill()
cw4.random_fill()
cw5.random_fill()
print([cw1.cells[0].coordinates, str(cw1)],
[cw2.cells[0].coordinates, str(cw2)],
[cw3.cells[0].coordinates, str(cw3)],
[cw4.cells[0].coordinates, str(cw4)],
[cw5.cells[0].coordinates, str(cw5)])
compile_crossword()
return self.__str__()
def sample_grind(self):
tries = 30
while tries is not 0:
try:
return self.sample()
except LookupError:
tries -= 1
def string_to_rows_and_cols(s):
"""
example use: string_to_rows_and_cols('12345\n67890\nABCDE\nefghi')
"""
rows = s.split('\n')
rows.reverse()
# for now lets say that all rows are the same length and take row 0 as x-size
col_num = len(rows[0])
row_num = len(rows)
columns = [
''.join([rows[rn][cn] for rn in range(row_num)])
for cn in range(col_num)
]
# print(rows)
# print(columns)
return [rows, columns]
def stars_string_to_sequences(s, min_length=0):
"""
Find all stars sequences in a string containing only '.'(dot) and '*'(star)
make a list of all star sequences containing position of stars in original string
e.g.
stars_string_to_sequences('..*****.**..*')
yields
[[2, 3, 4, 5, 6], [8, 9], [12]]
:param s: input string
:param min_length: minimum length of star sequence
:return:
"""
sequences = []
sequence = []
trimmed = 0
more_stars = False if s.find('*') is -1 else True
while more_stars:
# get star position
star_position = s.find('*')
# add star position to current sequence
sequence.append(star_position + trimmed)
# update string
s = s[star_position + 1:]
trimmed += star_position + 1
star_position -= trimmed
is_end_star = True if s.find('.') is 0 or len(s) is 0 else False
if is_end_star:
# print(f'found star sequence: {sequence}')
if len(sequence) >= min_length:
sequences.append(sequence)
sequence = []
more_stars = False if s.find('*') is -1 else True
return sequences
def try_me_grid():
# example using internals for defining words and cells
gc = GridCrossWord()
print(gc.sample_grind())
# example using grid defined by string