### Generador de sets de palabras para sopas de letra

Se usarán los datasets de palabras de la carpeta [datasets/](./datasets/)

In [3]:
import pandas as pd
import os
from unidecode import unidecode

In [9]:
dataset_path = './datasets/0_palabras_todas_no_conjugaciones.txt'
output_path = './output/'
num_sopas = 10
palabras_x_sopa = 15

**Load dataset**

Cargamos el dataset y eliminamos las palabras que contienen espacios

In [129]:
df = pd.read_csv(dataset_path)
print(len(df))

# filtrar las palabras de más de 10 letras
columna_palabras_filtrada = df['palabras'].apply(lambda x: len(x) > 10)
indices = df[columna_palabras_filtrada].index

# borramos las palabras largas ( > 10)
df = df.drop(indices)
print(len(df))

# index de las palabras con espacios
indices = df[df['palabras'].str.contains(' ')].index

# borramos las palabras con espacios
df = df.drop(indices)
print(len(df))

# index de las palabras con ñ
indices = df[df['palabras'].str.contains('ñ')].index

# borramos las palabras con ñ
df = df.drop(indices)
print(len(df))

108790
82335
82267
79969


**Generar sets en output**

In [130]:
if not os.path.exists(output_path):
    os.makedirs(output_path)
    
for i in range(1, num_sopas + 1):
    if not os.path.exists(output_path + str(i)):
        os.makedirs(output_path + str(i))
    
    sample_palabras = df['palabras'].sample(n=palabras_x_sopa)
    sample_palabras = sample_palabras.sort_values()
    sample_palabras = sample_palabras.str.upper()

    sample_palabras.to_csv(output_path + str(i) + '/palabras.txt', index=False, header=False)
    
    sample_palabras_sin_acentos = sample_palabras.apply(unidecode)
    sample_palabras_sin_acentos.to_csv(output_path + str(i) + '/palabras_sin_acentos.txt', index=False, header=False)
    
print("Fin!")

Fin!


## Generación de imagenes de las sopas

In [1]:
import string
import random
import numpy as np
import cairo

output_path = './output/'

ncols = 15
nrows = 15

letras = list(string.ascii_uppercase)

In [2]:
def get_direction_and_start_position(matrix, word):
        correct_position = False
        
        while not correct_position:
            # select random direction (0 = W -> E, 1 = N -> S, 2 = NW -> SE) 
            direction = random.randint(0, 2)
            
            if direction == 0:
                x = random.randint(0, nrows - 1)
                y = random.randint(0, ncols - len(word))
            elif direction == 1:
                x = random.randint(0, nrows - len(word))
                y = random.randint(0, ncols - 1)
            else:
                x = random.randint(0, nrows - len(word))
                y = random.randint(0, ncols - len(word))
            
            correct_position = True
            xtmp = x
            ytmp = y
            for letter in word:
                if matrix[xtmp][ytmp] != '' and matrix[xtmp][ytmp] != letter:
                    correct_position = False
                    break
                
                if correct_position:    
                    if direction == 0:
                        ytmp += 1
                    elif direction == 1:
                        xtmp += 1
                    else:
                        xtmp += 1
                        ytmp += 1                
        
        return direction, x, y
    
def generate_word_search_matrix(words):
    matrix = np.zeros((nrows, ncols), dtype=str)

    print (words)
    for word in words:
        direction, x, y = get_direction_and_start_position(matrix, word)
        
        for letter in word:
            matrix[x][y] = letter
            
            if direction == 0:
                y += 1
            elif direction == 1:
                x += 1
            else:
                x += 1
                y += 1    
            
    matrix_solution = matrix.copy()

    for i in range(nrows):
        for j in range(ncols):
            if matrix[i][j] == '':
                matrix[i][j] = random.choice(letras)
                matrix_solution[i][j] = ' '

    return matrix, matrix_solution

In [5]:
def generate_word_search_images(output_folder, matrix, matrix_solution):
    
    width = 30
    
    surface = cairo.SVGSurface(output_folder + "word_search.svg", (width * ncols), (width * nrows))
    ctx = cairo.Context(surface)
    ctx.rectangle(0, 0, (width * ncols), (width * nrows))
    ctx.set_source_rgb(1, 1, 1)
    ctx.fill()

    ctx.select_font_face('Arial', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_NORMAL)
    ctx.set_font_size(width)
    ctx.set_source_rgb(0, 0, 0)

    for row in range(len(matrix)):
        for column in range(len(matrix[row])):
            letter = matrix[row][column]
            te = ctx.text_extents(letter)
            x = column * width + (width - te.width) / 2
            y = row * width + (width + te.height) / 2
            ctx.move_to(x, y)
            ctx.show_text(letter)

    surface.write_to_png(output_folder + 'word_search.png')
    
    surface.finish()
    
    surface = cairo.SVGSurface(output_folder + "solution.svg", (width * ncols), (width * nrows))
    ctx = cairo.Context(surface)
    ctx.rectangle(0, 0, (width * ncols), (width * nrows))
    ctx.set_source_rgb(1, 1, 1)
    ctx.fill()

    ctx.select_font_face('Arial', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_NORMAL)
    ctx.set_font_size(width)
    ctx.set_source_rgb(0, 0, 0)

    for row in range(len(matrix_solution)):
        for column in range(len(matrix_solution[row])):
            letter = matrix_solution[row][column]
            te = ctx.text_extents(letter)
            x = column * width + (width - te.width) / 2
            y = row * width + (width + te.height) / 2
            ctx.move_to(x, y)
            ctx.show_text(letter)

    surface.write_to_png(output_folder + 'solution.png')
    
    surface.finish()

In [11]:
for i in range(1, num_sopas + 1):
    words_path = output_path + str(i) + '/palabras_sin_acentos.txt'

    with open(words_path, 'r') as f:
        words = [linea.strip() for linea in f]

    matrix, matrix_solution = generate_word_search_matrix(words)

    print('PUZZLE:')
    print(matrix)
    print('SOLUTION:')
    print(matrix_solution)

    generate_word_search_images(output_path + str(i) + '/', matrix, matrix_solution)

['ARRECIAR', 'AUTOMOVIL', 'BIOETANOL', 'CALIPICO', 'CHOTEAR', 'CORNETAZO', 'EXEGETICA', 'LODO', 'OLIVARERO', 'PIRINEO', 'SACONERIA', 'TACHUELA', 'TROCO', 'YODIZAR', 'ZAMBIANA']
PUZZLE:
[['M' 'Z' 'A' 'M' 'B' 'I' 'A' 'N' 'A' 'V' 'T' 'L' 'I' 'E' 'L']
 ['S' 'B' 'C' 'T' 'L' 'H' 'B' 'G' 'R' 'T' 'N' 'N' 'S' 'J' 'G']
 ['J' 'L' 'H' 'F' 'F' 'L' 'H' 'D' 'R' 'V' 'K' 'Z' 'C' 'S' 'E']
 ['L' 'Q' 'O' 'M' 'A' 'P' 'L' 'U' 'E' 'Z' 'N' 'Z' 'L' 'Q' 'C']
 ['Y' 'T' 'T' 'X' 'Y' 'N' 'Z' 'B' 'C' 'X' 'G' 'T' 'O' 'O' 'A']
 ['O' 'A' 'E' 'K' 'Z' 'O' 'O' 'G' 'I' 'W' 'G' 'G' 'D' 'L' 'L']
 ['D' 'C' 'A' 'Z' 'P' 'H' 'Z' 'W' 'A' 'U' 'L' 'X' 'O' 'I' 'I']
 ['I' 'H' 'R' 'V' 'T' 'K' 'P' 'N' 'R' 'S' 'Q' 'M' 'T' 'V' 'P']
 ['Z' 'U' 'C' 'O' 'R' 'N' 'E' 'T' 'A' 'Z' 'O' 'S' 'R' 'A' 'I']
 ['A' 'E' 'Y' 'B' 'I' 'O' 'E' 'T' 'A' 'N' 'O' 'L' 'O' 'R' 'C']
 ['R' 'L' 'D' 'A' 'U' 'T' 'O' 'M' 'O' 'V' 'I' 'L' 'C' 'E' 'O']
 ['A' 'A' 'K' 'S' 'A' 'C' 'O' 'N' 'E' 'R' 'I' 'A' 'O' 'R' 'U']
 ['Y' 'N' 'B' 'X' 'X' 'J' 'I' 'H' 'S' 'Y' 'E' 'D' 'Q' 'O' '

In [10]:
def get_direction_and_start_position(matrix, word):
    correct_position = False
    
    while not correct_position:
        # select random direction (0 = W -> E, 1 = N -> S, 2 = NW -> SE) 
        direction = random.randint(0, 2)
        
        if direction == 0:
            x = random.randint(0, nrows - 1)
            y = random.randint(0, ncols - len(word))
        elif direction == 1:
            x = random.randint(0, nrows - len(word))
            y = random.randint(0, ncols - 1)
        else:
            x = random.randint(0, nrows - len(word))
            y = random.randint(0, ncols - len(word))
        
        correct_position = True
        xtmp = x
        ytmp = y
        for letter in word:
            if matrix[xtmp][ytmp] != '' and matrix[xtmp][ytmp] != letter:
                correct_position = False
                break
            
            if correct_position:    
                if direction == 0:
                    ytmp += 1
                elif direction == 1:
                    xtmp += 1
                else:
                    xtmp += 1
                    ytmp += 1                
    
    return direction, x, y
    

words = ['PYTHON', 'PROGRAMMING', 'CODE', 'COMPUTER', 'ALGORITHM']
matrix = np.zeros((nrows, ncols), dtype=str)

print (words)
for word in words:
    direction, x, y = get_direction_and_start_position(matrix, word)
    
    for letter in word:
        matrix[x][y] = letter
        
        if direction == 0:
            y += 1
        elif direction == 1:
            x += 1
        else:
            x += 1
            y += 1    
        
matrix_solution = matrix.copy()

for i in range(nrows):
    for j in range(ncols):
        if matrix[i][j] == '':
            matrix[i][j] = random.choice(letras)

print(matrix_solution) 
print(matrix)

['PYTHON', 'PROGRAMMING', 'CODE', 'COMPUTER', 'ALGORITHM']
[['' '' '' '' '' '' '' '' '' '' '' '' '' '' '']
 ['' '' '' '' '' '' '' '' '' '' '' '' '' '' '']
 ['P' 'R' 'O' 'G' 'R' 'A' 'M' 'M' 'I' 'N' 'G' '' '' '' '']
 ['' 'P' '' 'C' '' '' '' '' '' '' '' '' '' '' '']
 ['' 'Y' '' 'O' '' '' '' '' '' '' '' '' '' '' '']
 ['' 'T' '' 'M' '' '' '' '' '' '' '' '' '' '' '']
 ['' 'H' '' 'P' '' '' '' '' '' '' '' '' '' '' '']
 ['' 'O' '' 'U' '' '' '' '' '' '' '' '' '' '' '']
 ['' 'N' '' 'T' '' '' '' '' '' '' '' '' '' '' '']
 ['' '' '' 'E' '' '' '' '' '' '' '' '' '' '' '']
 ['' '' '' 'R' '' '' '' '' '' '' '' '' '' '' '']
 ['' '' '' '' '' '' '' '' '' '' '' '' '' '' '']
 ['' '' '' '' '' '' '' '' '' '' '' '' '' '' '']
 ['A' 'L' 'G' 'O' 'R' 'I' 'T' 'H' 'M' '' '' '' '' '' '']
 ['' '' '' 'C' 'O' 'D' 'E' '' '' '' '' '' '' '' '']]
[['I' 'L' 'L' 'K' 'A' 'G' 'P' 'T' 'X' 'U' 'S' 'C' 'O' 'R' 'S']
 ['B' 'K' 'K' 'Z' 'X' 'N' 'B' 'S' 'G' 'P' 'D' 'U' 'X' 'C' 'A']
 ['P' 'R' 'O' 'G' 'R' 'A' 'M' 'M' 'I' 'N' 'G' 'T' 'N' 'T