# Single cell Transformer

In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F

from nltk.util import ngrams
from nltk import CFG
from nltk.tokenize import word_tokenize
from nltk.parse import RecursiveDescentParser

# hyperparameters
batch_size = 64
block_size = 16
max_iters = 1
eval_interval = 500
learning_rate = 3e-4
device = 'cuda' if torch.cuda.is_available() else 'cpu'
eval_iters = 200
n_embd = 384
n_head = 6
n_layer = 6
dropout = 0.1
split_ratio = 0.9

torch.manual_seed(1337)

with open('/content/cfg3b.txt', 'r', encoding='utf-8') as f: #ISO-8859-1 para español
    text = f.read()

chars = sorted(list(set(text)))
vocab_size = len(chars)
stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[i] for i in l])

# Train and test splits
data = torch.tensor(encode(text), dtype=torch.long)
n = int(split_ratio*len(data))
train_data = data[:n]
val_data = data[n:]

# data loading
def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

@torch.no_grad()
def estimate_loss():
    out = {}
    model.eval()
    for split in ['train', 'val']:
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(split)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out

class Head(nn.Module):

    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B,T,C = x.shape
        k = self.key(x)
        q = self.query(x)
        wei = q @ k.transpose(-2,-1) * k.shape[-1]**-0.5
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))
        wei = F.softmax(wei, dim=-1)
        wei = self.dropout(wei)
        v = self.value(x)
        out = wei @ v
        return out

class MultiHeadAttention(nn.Module):

    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(head_size * num_heads, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.dropout(self.proj(out))
        return out

'''class PositionalEncoding(nn.Module):

    def __init__(self, n_embd, dropout, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, n_embd)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, n_embd, 2) * -(math.log(10000.0) / n_embd)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[:, : x.size(1)].requires_grad_(False)
        return self.dropout(x)'''

class FeedFoward(nn.Module):

    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd), #en el transformer original, d_model=512
            nn.ReLU(),                    # y d_ffwd=2048, por eso la multiplicación
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):

    def __init__(self, n_embd, n_head):
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size) #sa is self attention
        self.ffwd = FeedFoward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

class GPTLanguageModel(nn.Module):

    def __init__(self):
        super().__init__()
        # each token directly reads off the logits for the next token from a lookup table
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.position_embedding_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd) # final layer norm
        self.lm_head = nn.Linear(n_embd, vocab_size)
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, idx, targets=None):
        B, T = idx.shape
        tok_emb = self.token_embedding_table(idx) # (B,T,C)
        pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
        x = tok_emb + pos_emb # (B,T,C)
        x = self.blocks(x) # (B,T,C)
        x = self.ln_f(x) # (B,T,C)

        logits = self.lm_head(x) # (B,T,vocab_size)
        print(logits.shape)
        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, loss = self(idx_cond)
            logits = logits[:, -1, :] # becomes (B, C)
            probs = F.softmax(logits, dim=-1) # (B, C)
            idx_next = torch.multinomial(probs, num_samples=1) # (B, 1)
            #if idx_next == torch.tensor([0], device='cuda:0'):
             # return idx
            idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
        return idx

model = GPTLanguageModel()
m = model.to(device)
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')

# create a PyTorch optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

for iter in range(max_iters):

    # every once in a while evaluate the loss on train and val sets
    if iter % eval_interval == 0:
        losses = estimate_loss()
        print(f"Epoch {iter} - Train Loss: {losses['train']:.4f} | Validation Loss: {losses['val']:.4f}")

    # sample a batch of data
    xb, yb = get_batch('train')

    # evaluate the loss
    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

10.64986 M parameters
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16, 4])
torch.Size([64, 16

In [None]:
# generate from the model
gen_text = ""
context = torch.zeros((1, 1), dtype=torch.long, device=device)
for i in range (500):
  gen_text += decode(m.generate(context, max_new_tokens=512)[0].tolist())
  #print(decode(m.generate(context, max_new_tokens=512)[0].tolist()))
#print(gen_text)

In [None]:
print(len(gen_text))
print(gen_text)

256500

know, Mr, espect all the devenesse, hespet dicumises arms and brished out savee
in mids, lagna, there head CER may be suphred
moreough proculatation who,
'York, that lay
share was on the
sammersor I had had found it. I begen those actain
litted as to make
which return!" the
waincewing, and to trembled not
acceptain, moselries were forquite of heirs in the tartive, praised, by the log-thorrence had dear protor in who was fiwdons the prise in
that Uncle Auvitor how are beauty to plate in the ungradan instrurc
 The Burim. His deads. The Kirgoing and come bolody. And which it alcormed by
went herward town
the great marry returned to delicapy him. He had alpoored some cannot
donor, we whole course, nor
eable
labouras aS it lamber his pirity was shribedo was not
whatevey had came it. I am natured with the King, he head other, whom we had fealsed by the
monial day. Nor not rons no honee of the larges beavoted ‘Agave it thr steph, the trificult of the
butiful. But? You wilk at leaurn h

In [None]:
#Definición de CFGs
import nltk
from nltk import CFG
from nltk.util import ngrams
from nltk.parse.generate import generate
from nltk.parse import RecursiveDescentParser

cfg3b = CFG.fromstring("""
    22 -> 21 20 | 20 19
    21 -> 18 16 | 16 18 17
    20 -> 16 17 | 17 16 18
    19 -> 17 18 16 | 16 17 18
    18 -> 14 13 | 15 14 13
    17 -> 15 13 14 | 14 13 15
    16 -> 13 15 14 | 15 13
    15 -> 11 12 10 | 12 11 10
    14 -> 10 11 12 | 11 10 12
    13 -> 12 11 | 11 12
    12 -> 9 7 8 | 8 9 7
    11 -> 7 8 9 | 8 7 9
    10 -> 9 8 7 | 7 9 8
    9 -> '2' '1' | '3' '2' '1'
    8 -> '3' '1' '2' | '3' '2'
    7 -> '1' '2' '3' | '3' '1'
""")

cfg3i = CFG.fromstring("""
    22 -> 21 20 19 | 19 19 20
    21 -> 18 17 | 16 16 18
    20 -> 18 18 | 17 16 17
    19 -> 16 16 | 18 16 18
    18 -> 14 15 | 14 15 13
    17 -> 15 14 | 15 15
    16 -> 14 14 | 13 13
    15 -> 11 10 12 | 11 11 10
    14 -> 10 10 | 10 10 10
    13 -> 10 12 11 | 12 11
    12 -> 8 7 | 7 9 9
    11 -> 7 7 8 | 7 7 7
    10 -> 9 9 | 8 7 7
    9 -> '1' '2' | '1' '1' '3'
    8 -> '2' '2' | '1' '1'
    7 -> '2' '3' '1' | '3' '1' '2'
""")

cfg3h = CFG.fromstring("""
    22 -> 19 21 | 20 20 21
    21 -> 17 18 17 | 17 17 18
    20 -> 17 16 | 18 16
    19 -> 18 17 | 16 17
    18 -> 14 15 15 | 15 14 14 | 15 13 13
    17 -> 15 13 15 | 13 14
    16 -> 15 13 | 14 13
    15 -> 11 11 10 | 10 12
    14 -> 12 12 10 | 10 10 | 10 12 12
    13 -> 11 10 | 12 11
    12 -> 9 8 | 8 7 | 7 9
    11 -> 7 9 9 | 7 7 | 8 7 7
    10 -> 8 8 | 9 7 | 8 7 9
    9 -> '1' '3' '3' | '2' '1' '3'
    8 -> '1' '3' | '3' '3' '1' | '1' '2'
    7 -> '1' '3' '1' | '1' '2' '3' | '2' '3' '2'
""")

cfg3g = CFG.fromstring("""
    22 -> 20 19 21 | 20 20 19 | 19 20
    21 -> 18 16 | 16 16 18 | 16 16
    20 -> 16 17 17 | 18 18 | 16 17
    19 -> 18 16 17 | 18 17 16 | 17 17 16
    18 -> 14 13 15 | 15 15 | 15 13
    17 -> 15 14 | 14 15 13 | 14 13 14
    16 -> 13 13 | 13 14 | 14 13 13
    15 -> 12 11 | 12 10 10 | 10 11
    14 -> 10 10 | 10 11 10 | 11 12
    13 -> 11 11 | 11 11 11 | 10 12
    12 -> 9 9 9 | 7 8 | 7 9
    11 -> 8 9 7 | 9 7 | 8 8 9
    10 -> 7 7 | 7 7 7 | 8 8 8
    9 -> '2' '1' | '2' '3' | '2' '3' '3'
    8 -> '3' '3' '1' | '1' '3' | '1' '3' '2'
    7 -> '2' '2' | '1' '1' | '2' '3' '1'
""")

cfg3f = CFG.fromstring("""
    22 -> 20 20 | 21 19 19 | 20 19 21 | 20 21
    21 -> 16 18 | 16 17 18 | 17 16 | 18 17
    20 -> 17 16 18 | 16 17 | 16 16
    19 -> 18 18 | 17 18 | 18 16 18
    18 -> 13 15 | 15 13 13 | 14 15 13
    17 -> 15 14 | 14 15 | 15 14 13
    16 -> 14 14 | 14 13 | 13 15 13 | 15 15
    15 -> 12 12 11 | 10 10 | 11 11 10 | 10 11 11
    14 -> 10 12 12 | 12 11 | 12 10 12 | 10 12
    13 -> 10 12 11 | 12 11 12 | 11 12
    12 -> 8 8 9 | 9 8 | 7 9 7
    11 -> 9 7 7 | 9 7 | 8 8
    10 -> 7 9 9 | 9 7 9 | 8 9 9
    9 -> '1' '1' | '3' '3' | '1' '2' '1'
    8 -> '3' '3' '1' | '1' '2' | '3' '1' '1'
    7 -> '3' '2' | '3' '1' '2' | '3' '2' '2' | '2' '2' '1'
""")
print(len(gen_text))

117508


In [None]:
#si el modelo tiene un block_size de tamaño x, n=x+1
#determina el porcentaje de n-1-gramas diferentes en cada palabra del texto
def diversity(text, n=17):
    tokens = text.split()  # Assuming 'text' is a space-separated string
    avg = 0
    for tok in tokens:
      if len(tok) > n:
        n_grams = list(ngrams(tok, n))
        unique_n_grams = set(n_grams)
        avg_tok = len(unique_n_grams) / len(list(n_grams))
        avg += avg_tok
    return avg/len(tokens)

#vamos comprobando para cada frase de un texto si cumple las reglas de la cfg
#devuelve el porcentaje de palabras del texto que las cumplen
#evalúa la calidad de las predicciones del modelo
def perplexity(text, grammar):
    parser = RecursiveDescentParser(grammar)
    frases = text.split()
    valid = 0
    for frase in frases:
    #frases_posibles = []
    #for frase in frases:
    #    if len(frase) >= 138: #tamaño mínimo frase
    #        frases_posibles.append(frase)
    #for i, frasep in enumerate(frases_posibles):
    #  if i < len(frases_posibles):
      for tree in parser.parse(frase):
    #print(frasep)
        valid += 1
            #print(tree)
        break
    print(f'Número de frases: {len(frases)}')
    #print(f'Número de frases posibles: {len(frases_posibles)}')
    print(f'Número de frases que cumplen las reglas: {valid}')
    print(f'Perplejidad: {valid/len(frases)}')
    #return valid/len(frases)

def diversityNotCFG(text, n=17):
  #num_trozos = (len(text) + n - 1) // n
  # Dividir el texto en trozos
  #trozos = [text[i * n:(i + 1) * n] for i in range(num_trozos)]
  text_chunks = [text[i:i+n] for i in range(len(text)-n+1)]
  unique_n_grams = set(text_chunks)
  return len(unique_n_grams)/len(text_chunks)

print(f'DiversityNotCFG: {diversityNotCFG(gen_text)}')
print(gen_text)
#perplexity(gen_text, cfg3f)

DiversityNotCFG: 0.9997387751282731

know, Mr, espect all the devenesse, hespet dicumises arms and brished out savee
in mids, lagna, there head CER may be suphred
moreough proculatation who,
'York, that lay
share was on the
sammersor I had had found it. I begen those actain
litted as to make
which return!" the
waincewing, and to trembled not
acceptain, moselries were forquite of heirs in the tartive, praised, by the log-thorrence had dear protor in who was fiwdons the prise in
that Uncle Auvitor how are beauty to plate in the ungradan instrurc
 The Burim. His deads. The Kirgoing and come bolody. And which it alcormed by
went herward town
the great marry returned to delicapy him. He had alpoored some cannot
donor, we whole course, nor
eable
labouras aS it lamber his pirity was shribedo was not
whatevey had came it. I am natured with the King, he head other, whom we had fealsed by the
monial day. Nor not rons no honee of the larges beavoted ‘Agave it thr steph, the trificult of the
butif

In [None]:
#para generar los datasets de las CFGs

# -*- coding: utf-8 -*-
"""
Created on Tue Oct 24 16:56:13 2023

@author: LENOVO
"""
import nltk
from nltk import CFG
from nltk.parse.generate import generate
from nltk.parse import RecursiveDescentParser

cfg3b = CFG.fromstring("""
    22 -> 21 20 | 20 19
    21 -> 18 16 | 16 18 17
    20 -> 16 17 | 17 16 18
    19 -> 17 18 16 | 16 17 18
    18 -> 14 13 | 15 14 13
    17 -> 15 13 14 | 14 13 15
    16 -> 13 15 14 | 15 13
    15 -> 11 12 10 | 12 11 10
    14 -> 10 11 12 | 11 10 12
    13 -> 12 11 | 11 12
    12 -> 9 7 8 | 8 9 7
    11 -> 7 8 9 | 8 7 9
    10 -> 9 8 7 | 7 9 8
    9 -> '2' '1' | '3' '2' '1'
    8 -> '3' '1' '2' | '3' '2'
    7 -> '1' '2' '3' | '3' '1'
""")

cfg3i = CFG.fromstring("""
    22 -> 21 20 19 | 19 19 20
    21 -> 18 17 | 16 16 18
    20 -> 18 18 | 17 16 17
    19 -> 16 16 | 18 16 18
    18 -> 14 15 | 14 15 13
    17 -> 15 14 | 15 15
    16 -> 14 14 | 13 13
    15 -> 11 10 12 | 11 11 10
    14 -> 10 10 | 10 10 10
    13 -> 10 12 11 | 12 11
    12 -> 8 7 | 7 9 9
    11 -> 7 7 8 | 7 7 7
    10 -> 9 9 | 8 7 7
    9 -> '1' '2' | '1' '1' '3'
    8 -> '2' '2' | '1' '1'
    7 -> '2' '3' '1' | '3' '1' '2'
""")

cfg3h = CFG.fromstring("""
    22 -> 19 21 | 20 20 21
    21 -> 17 18 17 | 17 17 18
    20 -> 17 16 | 18 16
    19 -> 18 17 | 16 17
    18 -> 14 15 15 | 15 14 14 | 15 13 13
    17 -> 15 13 15 | 13 14
    16 -> 15 13 | 14 13
    15 -> 11 11 10 | 10 12
    14 -> 12 12 10 | 10 10 | 10 12 12
    13 -> 11 10 | 12 11
    12 -> 9 8 | 8 7 | 7 9
    11 -> 7 9 9 | 7 7 | 8 7 7
    10 -> 8 8 | 9 7 | 8 7 9
    9 -> '1' '3' '3' | '2' '1' '3'
    8 -> '1' '3' | '3' '3' '1' | '1' '2'
    7 -> '1' '3' '1' | '1' '2' '3' | '2' '3' '2'
""")

cfg3g = CFG.fromstring("""
    22 -> 20 19 21 | 20 20 19 | 19 20
    21 -> 18 16 | 16 16 18 | 16 16
    20 -> 16 17 17 | 18 18 | 16 17
    19 -> 18 16 17 | 18 17 16 | 17 17 16
    18 -> 14 13 15 | 15 15 | 15 13
    17 -> 15 14 | 14 15 13 | 14 13 14
    16 -> 13 13 | 13 14 | 14 13 13
    15 -> 12 11 | 12 10 10 | 10 11
    14 -> 10 10 | 10 11 10 | 11 12
    13 -> 11 11 | 11 11 11 | 10 12
    12 -> 9 9 9 | 7 8 | 7 9
    11 -> 8 9 7 | 9 7 | 8 8 9
    10 -> 7 7 | 7 7 7 | 8 8 8
    9 -> '2' '1' | '2' '3' | '2' '3' '3'
    8 -> '3' '3' '1' | '1' '3' | '1' '3' '2'
    7 -> '2' '2' | '1' '1' | '2' '3' '1'
""")

cfg3f = CFG.fromstring("""
    22 -> 20 20 | 21 19 19 | 20 19 21 | 20 21
    21 -> 16 18 | 16 17 18 | 17 16 | 18 17
    20 -> 17 16 18 | 16 17 | 16 16
    19 -> 18 18 | 17 18 | 18 16 18
    18 -> 13 15 | 15 13 13 | 14 15 13
    17 -> 15 14 | 14 15 | 15 14 13
    16 -> 14 14 | 14 13 | 13 15 13 | 15 15
    15 -> 12 12 11 | 10 10 | 11 11 10 | 10 11 11
    14 -> 10 12 12 | 12 11 | 12 10 12 | 10 12
    13 -> 10 12 11 | 12 11 12 | 11 12
    12 -> 8 8 9 | 9 8 | 7 9 7
    11 -> 9 7 7 | 9 7 | 8 8
    10 -> 7 9 9 | 9 7 9 | 8 9 9
    9 -> '1' '1' | '3' '3' | '1' '2' '1'
    8 -> '3' '3' '1' | '1' '2' | '3' '1' '1'
    7 -> '3' '2' | '3' '1' '2' | '3' '2' '2' | '2' '2' '1'
""")

frases_cfg3b = list(generate(cfg3b, n=7000))
out_cfg3b = ' '.join([''.join(sublist) for sublist in frases_cfg3b])

prueba = list(generate(cfg3b, n=10000))
out_cfg3b = ' '.join([''.join(sublist) for sublist in prueba])
with open('prueba.txt', 'w') as archivo:
    archivo.write(out_cfg3b)

with open('cfg3b.txt', 'w') as archivo:
    archivo.write(out_cfg3b)

print("frases generadas y guardadas en cfg3b.txt")

frases_cfg3i = list(generate(cfg3i, n=10000))
out_cfg3i = ' '.join([''.join(sublist) for sublist in frases_cfg3i])

with open('cfg3i.txt', 'w') as archivo:
    archivo.write(out_cfg3i)

print("frases generadas y guardadas en cfg3i.txt")

frases_cfg3h = list(generate(cfg3h, n=7000))
out_cfg3h = ''.join([''.join(sublist) for sublist in frases_cfg3h])
print(out_cfg3h)

with open('cfg3h.txt', 'w') as archivo:
    archivo.write(out_cfg3h)

print("frases generadas y guardadas en cfg3h.txt")

frases_cfg3g = list(generate(cfg3g, n=7000))
out_cfg3g = ' '.join([''.join(sublist) for sublist in frases_cfg3g])

with open('cfg3g.txt', 'w') as archivo:
    archivo.write(out_cfg3g)

print("frases generadas y guardadas en cfg3g.txt")

frases_cfg3f = list(generate(cfg3f, n=7000))
out_cfg3f = ' '.join([''.join(sublist) for sublist in frases_cfg3f])

with open('cfg3f.txt', 'w') as archivo:
    archivo.write(out_cfg3f)

print("frases generadas y guardadas en cfg3f.txt")