### Generador de sets de palabras para sopas de letra

Se usarán los datasets de palabras de la carpeta [datasets/](./datasets/)

In [3]:
import pandas as pd
import os
from unidecode import unidecode

In [6]:
dataset_path = './datasets/0_palabras_todas_no_conjugaciones.txt'
output_path = './output/'
num_sopas = 10
palabras_x_sopa = 15

**Load dataset**

Cargamos el dataset y eliminamos las palabras que contienen espacios

In [129]:
df = pd.read_csv(dataset_path)
print(len(df))

# filtrar las palabras de más de 10 letras
columna_palabras_filtrada = df['palabras'].apply(lambda x: len(x) > 10)
indices = df[columna_palabras_filtrada].index

# borramos las palabras largas ( > 10)
df = df.drop(indices)
print(len(df))

# index de las palabras con espacios
indices = df[df['palabras'].str.contains(' ')].index

# borramos las palabras con espacios
df = df.drop(indices)
print(len(df))

# index de las palabras con ñ
indices = df[df['palabras'].str.contains('ñ')].index

# borramos las palabras con ñ
df = df.drop(indices)
print(len(df))

108790
82335
82267
79969


**Generar sets en output**

In [130]:
if not os.path.exists(output_path):
    os.makedirs(output_path)
    
for i in range(1, num_sopas + 1):
    if not os.path.exists(output_path + str(i)):
        os.makedirs(output_path + str(i))
    
    sample_palabras = df['palabras'].sample(n=palabras_x_sopa)
    sample_palabras = sample_palabras.sort_values()
    sample_palabras = sample_palabras.str.upper()

    sample_palabras.to_csv(output_path + str(i) + '/palabras.txt', index=False, header=False)
    
    sample_palabras_sin_acentos = sample_palabras.apply(unidecode)
    sample_palabras_sin_acentos.to_csv(output_path + str(i) + '/palabras_sin_acentos.txt', index=False, header=False)
    
print("Fin!")

Fin!


## Generación de imagenes de las sopas

In [7]:
import string
import random
import numpy as np
import cairo

output_path = './output/'

ncols = 15
nrows = 15

letras = list(string.ascii_uppercase)

In [88]:
def get_direction_and_start_position(matrix, word):
        correct_position = False
        
        while not correct_position:
            # select random direction (0 = W -> E, 1 = N -> S, 2 = NW -> SE) 
            direction = random.randint(0, 2)
            
            if direction == 0:
                x = random.randint(0, nrows - 1)
                y = random.randint(0, ncols - len(word))
            elif direction == 1:
                x = random.randint(0, nrows - len(word))
                y = random.randint(0, ncols - 1)
            else:
                x = random.randint(0, nrows - len(word))
                y = random.randint(0, ncols - len(word))
            
            correct_position = True
            xtmp = x
            ytmp = y
            for letter in word:
                if matrix[xtmp][ytmp] != '' and matrix[xtmp][ytmp] != letter:
                    correct_position = False
                    break
                
                if correct_position:    
                    if direction == 0:
                        ytmp += 1
                    elif direction == 1:
                        xtmp += 1
                    else:
                        xtmp += 1
                        ytmp += 1                
        
        return direction, x, y
    
def generate_word_search_matrix(words):
    matrix = np.zeros((nrows, ncols), dtype=str)

    print (words)
    for word in words:
        direction, x, y = get_direction_and_start_position(matrix, word)
        
        for letter in word:
            matrix[x][y] = letter
            
            if direction == 0:
                y += 1
            elif direction == 1:
                x += 1
            else:
                x += 1
                y += 1    
            
    matrix_solution = matrix.copy()

    for i in range(nrows):
        for j in range(ncols):
            if matrix[i][j] == '':
                matrix[i][j] = random.choice(letras)
                matrix_solution[i][j] = ' '

    return matrix, matrix_solution

In [101]:
def generate_word_search_images(output_folder, matrix, matrix_solution):
    
    width = 30
    
    surface = cairo.SVGSurface(output_folder + "sopa_de_letras.svg", (width * ncols), (width * nrows))
    ctx = cairo.Context(surface)
    ctx.rectangle(0, 0, 500, 500)
    ctx.set_source_rgb(1, 1, 1)
    ctx.fill()

    ctx.select_font_face('Arial', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_NORMAL)
    ctx.set_font_size(width)
    ctx.set_source_rgb(0, 0, 0)

    for row in range(len(matrix_solution)):
        for column in range(len(matrix_solution[row])):
            letter = matrix_solution[row][column]
            te = ctx.text_extents(letter)
            x = column * width + (width - te.width) / 2
            y = row * width + (width + te.height) / 2
            ctx.move_to(x, y)
            ctx.show_text(letter)

    surface.write_to_png(output_folder + 'sopa_de_letras.png')
    
    surface.finish()

In [102]:
#for i in range(1, num_sopas + 1):
i = 1
words_path = output_path + str(i) + '/palabras_sin_acentos.txt'

#words = pd.read_csv(palabras_path, header=None )
with open(words_path, 'r') as f:
    words = [linea.strip() for linea in f]

matrix, matrix_solution = generate_word_search_matrix(words)

print('PUZZLE:')
print(matrix)
print('SOLUTION:')
print(matrix_solution)

generate_word_search_images(output_path + str(i) + '/', matrix, matrix_solution)

['ARRECIAR', 'AUTOMOVIL', 'BIOETANOL', 'CALIPICO', 'CHOTEAR', 'CORNETAZO', 'EXEGETICA', 'LODO', 'OLIVARERO', 'PIRINEO', 'SACONERIA', 'TACHUELA', 'TROCO', 'YODIZAR', 'ZAMBIANA']
PUZZLE:
[['H' 'M' 'S' 'P' 'K' 'A' 'N' 'M' 'D' 'K' 'Z' 'S' 'T' 'T' 'O']
 ['T' 'T' 'A' 'A' 'I' 'S' 'C' 'I' 'X' 'M' 'R' 'V' 'V' 'R' 'Y']
 ['U' 'A' 'U' 'B' 'C' 'R' 'J' 'O' 'O' 'L' 'O' 'D' 'O' 'O' 'X']
 ['M' 'C' 'T' 'S' 'I' 'O' 'I' 'P' 'R' 'M' 'O' 'I' 'Z' 'C' 'R']
 ['R' 'H' 'O' 'E' 'K' 'O' 'N' 'N' 'Y' 'N' 'R' 'M' 'S' 'O' 'S']
 ['C' 'U' 'M' 'O' 'K' 'L' 'E' 'E' 'E' 'O' 'E' 'D' 'A' 'Z' 'I']
 ['H' 'E' 'O' 'L' 'E' 'D' 'T' 'T' 'R' 'O' 'D' 'T' 'A' 'J' 'P']
 ['O' 'L' 'V' 'I' 'T' 'B' 'Q' 'H' 'A' 'I' 'S' 'I' 'A' 'P' 'S']
 ['T' 'A' 'I' 'V' 'X' 'T' 'Y' 'X' 'E' 'N' 'A' 'A' 'Z' 'Z' 'D']
 ['E' 'I' 'L' 'A' 'L' 'L' 'G' 'S' 'G' 'C' 'O' 'D' 'C' 'A' 'O']
 ['A' 'K' 'B' 'R' 'B' 'G' 'F' 'A' 'T' 'J' 'E' 'L' 'X' 'J' 'R']
 ['R' 'Q' 'N' 'E' 'D' 'L' 'E' 'X' 'E' 'G' 'E' 'T' 'I' 'C' 'A']
 ['V' 'K' 'F' 'R' 'C' 'A' 'L' 'I' 'P' 'I' 'C' 'O' 'C' 'W' '

In [103]:
def get_direction_and_start_position(matrix, word):
    correct_position = False
    
    while not correct_position:
        # select random direction (0 = W -> E, 1 = N -> S, 2 = NW -> SE) 
        direction = random.randint(0, 2)
        
        if direction == 0:
            x = random.randint(0, nrows - 1)
            y = random.randint(0, ncols - len(word))
        elif direction == 1:
            x = random.randint(0, nrows - len(word))
            y = random.randint(0, ncols - 1)
        else:
            x = random.randint(0, nrows - len(word))
            y = random.randint(0, ncols - len(word))
        
        correct_position = True
        xtmp = x
        ytmp = y
        for letter in word:
            if matrix[xtmp][ytmp] != '' and matrix[xtmp][ytmp] != letter:
                correct_position = False
                break
            
            if correct_position:    
                if direction == 0:
                    ytmp += 1
                elif direction == 1:
                    xtmp += 1
                else:
                    xtmp += 1
                    ytmp += 1                
    
    return direction, x, y
    

words = ['PYTHON', 'PROGRAMMING', 'CODE', 'COMPUTER', 'ALGORITHM']
matrix = np.zeros((nrows, ncols), dtype=str)

print (words)
for word in words:
    direction, x, y = get_direction_and_start_position(matrix, word)
    
    for letter in word:
        matrix[x][y] = letter
        
        if direction == 0:
            y += 1
        elif direction == 1:
            x += 1
        else:
            x += 1
            y += 1    
        
matrix_solution = matrix.copy()

for i in range(nrows):
    for j in range(ncols):
        if matrix[i][j] == '':
            matrix[i][j] = random.choice(letras)

print(matrix_solution) 
print(matrix)

['PYTHON', 'PROGRAMMING', 'CODE', 'COMPUTER', 'ALGORITHM']
[['' '' '' '' '' '' '' '' 'C' '' '' '' '' '' '']
 ['' '' '' '' '' '' '' '' 'O' '' '' 'P' '' '' '']
 ['' '' '' '' '' '' '' '' 'M' '' '' 'R' '' '' '']
 ['' '' '' '' '' '' '' '' 'P' '' '' 'O' '' '' '']
 ['' '' '' '' '' '' '' '' 'U' '' '' 'G' '' '' '']
 ['' '' '' '' '' '' '' '' 'T' 'A' '' 'R' '' '' '']
 ['' '' '' '' '' '' '' '' 'E' 'L' '' 'A' '' '' '']
 ['' '' '' '' '' '' '' '' 'R' 'G' '' 'M' '' '' '']
 ['' '' '' '' '' 'P' 'Y' 'T' 'H' 'O' 'N' 'M' '' '' '']
 ['' '' '' '' '' '' '' '' '' 'R' '' 'I' '' '' '']
 ['' '' '' '' '' '' '' '' '' 'I' '' 'N' '' '' '']
 ['' '' '' 'C' '' '' '' '' '' 'T' '' 'G' '' '' '']
 ['' '' '' '' 'O' '' '' '' '' 'H' '' '' '' '' '']
 ['' '' '' '' '' 'D' '' '' '' 'M' '' '' '' '' '']
 ['' '' '' '' '' '' 'E' '' '' '' '' '' '' '' '']]
[['Z' 'Q' 'Y' 'H' 'Y' 'F' 'U' 'Q' 'C' 'R' 'H' 'X' 'W' 'S' 'P']
 ['Y' 'R' 'X' 'Y' 'D' 'M' 'Y' 'V' 'O' 'D' 'Z' 'P' 'J' 'H' 'N']
 ['S' 'G' 'R' 'G' 'T' 'F' 'N' 'W' 'M' 'O' 'H' 'R' 'N' 'Q'

In [104]:
xtmp = 0
ytmp = 0
letter = 'P'

print(matrix[xtmp][ytmp])
print (matrix[xtmp][ytmp] != '' or (matrix[xtmp][ytmp] != '' and matrix[xtmp][ytmp] != letter))
print (matrix[xtmp][ytmp] != '' and matrix[xtmp][ytmp] != letter)
#if matrix[xtmp][ytmp] != '' or (matrix[xtmp][ytmp] != '' and matrix[xtmp][ytmp] == letter):

Z
True
True
