### Generador de sets de palabras para sopas de letra

Se usarán los datasets de palabras de la carpeta [datasets/](./datasets/)

In [1]:
import pandas as pd
import os
import shutil
from unidecode import unidecode

In [2]:
dataset_path = './datasets/ES_palabras_sin_conjugaciones.txt'
output_path = './output/'
num_sopas = 10
palabras_x_sopa = 15

**Load dataset**

Cargamos el dataset y eliminamos las palabras que no queremos en las sopas de letras

In [6]:
df = pd.read_csv(dataset_path)
print(len(df))

# filtrar las palabras de menos de 4 letras o más de 10
columna_palabras_filtrada = df['palabras'].apply(lambda x: len(x) < 4 or len(x) > 10)
indices = df[columna_palabras_filtrada].index

# borramos las palabras largas ( > 10)
df = df.drop(indices)
print(len(df))

# index de las palabras con espacios
indices = df[df['palabras'].str.contains(' ')].index

# borramos las palabras con espacios
df = df.drop(indices)
print(len(df))

# index de las palabras con ñ
indices = df[df['palabras'].str.contains('ñ')].index

# borramos las palabras con ñ
df = df.drop(indices)
print(len(df))

108790
81792
81724
79436


**Generar sets en output**

In [36]:
if not os.path.exists(output_path):
    os.makedirs(output_path)
else:
    shutil.rmtree(output_path)
    os.makedirs(output_path)
    
for i in range(1, num_sopas + 1):
    if not os.path.exists(output_path + str(i)):
        os.makedirs(output_path + str(i))
    
    sample_palabras = df['palabras'].sample(n=palabras_x_sopa)
    sample_palabras = sample_palabras.sort_values()
    sample_palabras = sample_palabras.str.upper()

    sample_palabras.to_csv(output_path + str(i) + '/palabras.txt', index=False, header=False)
    
    sample_palabras_sin_acentos = sample_palabras.apply(unidecode)
    sample_palabras_sin_acentos.to_csv(output_path + str(i) + '/palabras_sin_acentos.txt', index=False, header=False)
    
print("Fin!")

Fin!


## Generación de imagenes de las sopas

In [37]:
import string
import random
import numpy as np
import cairo

output_path = './output/'

ncols = 15
nrows = 15

letras = list(string.ascii_uppercase)

In [38]:
def get_direction_and_start_position(matrix, word):
        correct_position = False
        
        while not correct_position:
            # select random direction (0 = W -> E, 1 = N -> S, 2 = NW -> SE) 
            direction = random.randint(0, 2)
            
            if direction == 0:
                x = random.randint(0, nrows - 1)
                y = random.randint(0, ncols - len(word))
            elif direction == 1:
                x = random.randint(0, nrows - len(word))
                y = random.randint(0, ncols - 1)
            else:
                x = random.randint(0, nrows - len(word))
                y = random.randint(0, ncols - len(word))
            
            correct_position = True
            xtmp = x
            ytmp = y
            for letter in word:
                if matrix[xtmp][ytmp] != '' and matrix[xtmp][ytmp] != letter:
                    correct_position = False
                    break
                
                if correct_position:    
                    if direction == 0:
                        ytmp += 1
                    elif direction == 1:
                        xtmp += 1
                    else:
                        xtmp += 1
                        ytmp += 1                
        
        return direction, x, y
    
def generate_word_search_matrix(words):
    matrix = np.zeros((nrows, ncols), dtype=str)
    list_solution = []

    for word in words:
        direction, x, y = get_direction_and_start_position(matrix, word)
        list_solution.append([word, direction, x, y])
        
        for letter in word:
            matrix[x][y] = letter
            
            if direction == 0:
                y += 1
            elif direction == 1:
                x += 1
            else:
                x += 1
                y += 1    
            
    for i in range(nrows):
        for j in range(ncols):
            if matrix[i][j] == '':
                matrix[i][j] = random.choice(letras)
    
    return matrix, list_solution

In [39]:
def generate_word_search_images(output_folder, matrix, list_solution):
    
    width = 30
    
    surface = cairo.SVGSurface(output_folder + "word_search.svg", (width * ncols), (width * nrows))
    ctx = cairo.Context(surface)
    ctx.rectangle(0, 0, (width * ncols), (width * nrows))
    ctx.set_source_rgb(1, 1, 1)
    ctx.fill()

    ctx.select_font_face('Arial', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_NORMAL)
    ctx.set_font_size(width)
    ctx.set_source_rgb(0, 0, 0)

    for row in range(len(matrix)):
        for column in range(len(matrix[row])):
            letter = matrix[row][column]
            te = ctx.text_extents(letter)
            x = column * width + (width - te.width) / 2
            y = row * width + (width + te.height) / 2
            ctx.move_to(x, y)
            ctx.show_text(letter)

    surface.write_to_png(output_folder + 'word_search.png')
    
    surface.finish()
    
    surface = cairo.SVGSurface(output_folder + "solution.svg", (width * ncols), (width * nrows))
    ctx = cairo.Context(surface)
    ctx.rectangle(0, 0, (width * ncols), (width * nrows))
    ctx.set_source_rgb(1, 1, 1)
    ctx.fill()

    ctx.select_font_face('Arial', cairo.FONT_SLANT_NORMAL, cairo.FONT_WEIGHT_NORMAL)
    ctx.set_font_size(width)
    ctx.set_source_rgb(0, 0, 0)

    for row in range(len(matrix)):
        for column in range(len(matrix[row])):
            letter = matrix[row][column]
            te = ctx.text_extents(letter)
            x = column * width + (width - te.width) / 2
            y = row * width + (width + te.height) / 2
            ctx.move_to(x, y)
            ctx.show_text(letter)
    
    ctx.set_source_rgb(0, 0, 0)
    ctx.set_line_width(3)
        
    # Lines for solutions
    # Directions (0 = W -> E, 1 = N -> S, 2 = NW -> SE)
    for solution in list_solution:
        length = len(solution[0])
        direction = solution[1]
        y = solution[2]
        x = solution[3]
        
        ctx.move_to(
            (x * width) + (width / 2), # row
            (y * width) + (width / 2)) # col
        
        if direction == 0:
            ctx.line_to(
                ((x + length - 1) * width) + (width / 2), 
                ((y) * width) + (width / 2))
        elif direction == 1:
            ctx.line_to(
                ((x) * width) + (width / 2), 
                ((y + length - 1) * width) + (width / 2))
        else:
            ctx.line_to(
                ((x + length - 1) * width) + (width / 2), 
                ((y + length - 1) * width) + (width / 2))
            
        ctx.stroke()
    
    surface.write_to_png(output_folder + 'solution.png')
    
    surface.finish()

In [40]:
for i in range(1, num_sopas + 1):
    print('PUZZLE: ', i)
    words_path = output_path + str(i) + '/palabras_sin_acentos.txt'

    with open(words_path, 'r') as f:
        words = [linea.strip() for linea in f]

    matrix, list_solution = generate_word_search_matrix(words)

    generate_word_search_images(output_path + str(i) + '/', matrix, list_solution)
    
    print('PUZZLE: ', i, ' Done!')

PUZZLE:  1
PUZZLE:  1  Done!
PUZZLE:  2
PUZZLE:  2  Done!
PUZZLE:  3
PUZZLE:  3  Done!
PUZZLE:  4
PUZZLE:  4  Done!
PUZZLE:  5
PUZZLE:  5  Done!
PUZZLE:  6
PUZZLE:  6  Done!
PUZZLE:  7
PUZZLE:  7  Done!
PUZZLE:  8
PUZZLE:  8  Done!
PUZZLE:  9
PUZZLE:  9  Done!
PUZZLE:  10
PUZZLE:  10  Done!
