# Gerador de Letras Musicais

Gerador de Letras Musicais estocástico, modelado puramente com uma Cadeia de Markov. Foram consideradas 99 músicas da banda Coldplay para produzir o resultado.

A aplicação não possui elementos de inteligência artificial ou de processamento de linguagem natural, os quais melhorariam (sintática e semanticamente) a qualidade da saída produzida.

#### Importação das bibliotecas utilizadas

In [None]:
import os
from nltk.tokenize import word_tokenize
from random import choices

#### Leitura das músicas

In [None]:
FOLDER_PATH = './data/'

def read_songs():
    verses = []
    for _, _, file_names in os.walk(FOLDER_PATH):
        for file_name in file_names:
            file = open(FOLDER_PATH + file_name)
            for line in file:
                line = line.strip()
                if line != '':
                    verses.append(line)
    return verses

verses = read_songs()
print('Número de versos =', len(verses))

#### Extração das palavras

In [None]:
def clean_verses(verses):
    cleaned_verses = []
    for verse in verses:
        tokens = word_tokenize(verse)
        words = [word.lower() for word in tokens if word.isalpha()]
        cleaned_verses += words
    return cleaned_verses

words = clean_verses(verses)
print('Número de palavras =', len(words))

#### Criação da Cadeia de Markov

In [None]:
def create_markov_chain(words, max_grouping=1):
    markov_chain = {}

    for i in range(len(words) - (max_grouping + 1)):
        current_state = ''
        next_state = ''
        for j in range(max_grouping):
            current_state += words[i+j] + ' '
            next_state += words[i+j+max_grouping] + ' '
        current_state = current_state[:-1]
        next_state = next_state[:-1]
        if current_state not in markov_chain:
            markov_chain[current_state] = {}
            markov_chain[current_state][next_state] = 1
        else:
            if next_state in markov_chain[current_state]:
                markov_chain[current_state][next_state] += 1
            else:
                markov_chain[current_state][next_state] = 1

    for current_state, transition in markov_chain.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_chain[current_state][state] = count / total
        
    return markov_chain

markov_chain = create_markov_chain(words)
print('Total de estados =', len(markov_chain.keys()))

#### Geração das Músicas

In [None]:
def generate_lyrics(markov_chain, max_transitions=200, start='i'):
    transitions_made = 0
    current_state = start
    next_state = None
    lyrics = current_state + ' '

    while transitions_made < max_transitions:
        next_state = choices(list(markov_chain[current_state].keys()), list(markov_chain[current_state].values()))
        current_state = next_state[0]
        lyrics += current_state + ' '
        transitions_made += 1
        if transitions_made % 10 == 0:
            lyrics += '\n\n' if transitions_made % 50 == 0 else '\n'

    return lyrics

print(generate_lyrics(markov_chain))