In [154]:
import random

class MusicEnvironment:
    def __init__(self):
        self.state = [0.0,0.0,1.0,2.0]
        self.action_space = [0.0,  1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
        #self.action_space = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

    def reset(self):
        self.state = [0.0,0.0,1.0,2.0]
        return self.state

    def step(self, action_index):
        if action_index < 0 or action_index >= len(self.action_space):
            raise ValueError("Invalid action index")
        
        note = self.action_space[action_index]
        self.state.append(note)
        self.state = self.state[1:5]
        
        # Calculate reward
        reward = self.calculate_reward()
        
        
        return self.state, reward

    def calculate_reward(self):
        major_patterns_scores =[]
        minor_patterns_scores =[]
        for note in self.state:
            major_pattern = [note+2,note+4,note+5,note+7,note+9,note+11,note+12] 
            major_pattern = [x if x<11 else x-11 for x in  major_pattern]

            minor_pattern = [note+2,note+3,note+5,note+7,note+8,note+10,note+12]
            minor_pattern = [x if x<11 else x-11 for x in  minor_pattern]

            major_patterns_scores.append(sum(1 if note in major_pattern else 0 for note in self.state))
            minor_patterns_scores.append(sum(1 if note in minor_pattern else 0 for note in self.state))

        return max([max(minor_patterns_scores),max(major_patterns_scores)]) # Random reward for demonstration purposes


In [151]:
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state):
        self.memory.append((state, action, reward, next_state))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)
        act_values = self.model.predict(np.reshape(state,[1,self.state_size]))
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        print("replay started")
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state in minibatch:
            target = reward
            if len(next_state)>self.state_size:
                next_state = next_state[1:self.state_size+1]
            print(state, action, reward, next_state )
            target = (reward + self.gamma * np.amax(self.model.predict(np.reshape(next_state,[1,self.state_size]))[0]))
            target_f = self.model.predict(np.reshape(state,[1,self.state_size]))
            target_f[0][action] = target
            self.model.fit(np.reshape(state,[1,self.state_size]), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay



In [152]:
# Create an instance of the music environment
env = MusicEnvironment()
# Get the state and action sizes
state_size = 4
action_size = len(env.action_space)
# Create an instance of the DQN agent
agent = DQNAgent(state_size, action_size)

# Train the agent for 500 steps
batch_size = 32
episodes = 50
for e in range(episodes):
    state = env.reset()

    if len(agent.memory) > batch_size:
        agent.replay(batch_size)
    action = agent.act(state)
    next_state, reward = env.step(action)
    state = state[0:agent.state_size]
    #print(state, action, reward, next_state)
    agent.remember(state, action, reward, next_state)
    state = next_state
        #print(state)
    print("episode: {}/{}, score: {}".format(e, episodes, reward))
    



episode: 0/50, score: 4
episode: 1/50, score: 5
episode: 2/50, score: 5
episode: 3/50, score: 4
episode: 4/50, score: 5
episode: 5/50, score: 4
episode: 6/50, score: 5
episode: 7/50, score: 5
episode: 8/50, score: 5
episode: 9/50, score: 4
episode: 10/50, score: 4
episode: 11/50, score: 5
episode: 12/50, score: 5
episode: 13/50, score: 5
episode: 14/50, score: 5
episode: 15/50, score: 4
episode: 16/50, score: 5
episode: 17/50, score: 5
episode: 18/50, score: 5
episode: 19/50, score: 5
episode: 20/50, score: 5
episode: 21/50, score: 4
episode: 22/50, score: 5
episode: 23/50, score: 5
episode: 24/50, score: 4
episode: 25/50, score: 5
episode: 26/50, score: 5
episode: 27/50, score: 4
episode: 28/50, score: 4
episode: 29/50, score: 5
episode: 30/50, score: 5
episode: 31/50, score: 5
episode: 32/50, score: 5
replay started
[0.0, 0.0, 1.0, 2.0] 6 4 [0.0, 1.0, 2.0, 6.0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[0.0, 0.0, 1.0, 2.0] 2 5 [0.0, 1.0, 2.0, 2.0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[0.0, 0.0, 1.0, 2.0] 1 5 [0.0, 1.0, 2.0, 1.0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[0.0, 0.0, 1.0, 2.0] 1 5 [0.0, 1.0, 2.0, 1.0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[0.0, 0.0, 1.0, 2.0] 4 5 [0.0, 1.0, 2.0, 4.0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[0.0, 0.0, 1.0, 2.0] 0 5 [0.0, 1.0, 2.0, 0.0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[0.0, 0.0, 1.0, 2.0] 10 4 [0.0, 1.0, 2.0, 10.0]
[

In [155]:
# Generate output after training
state = env.reset()
if not state:  # Check if state is empty
    state = [0] * state_size  # Set state to a default value
else:
    state = np.reshape(state, [1, state_size])

output = []
for i in range(30):
    action = agent.act(state)
    next_state, reward = env.step(action)
    next_state = np.reshape(next_state, [1, state_size])
    state = next_state
    output.append(env.action_space[action])

action_space_dict = {0.0:'C4', 1.0:'C4#', 2.0:'D4', 3.0:'D4#', 4.0:'E4', 5.0:'F4', 6.0:'F4#', 7.0:'G4', 8.0:'G4#', 9.0:'A4', 10.0:'A4#', 11.0:'B4'}
output = [action_space_dict[index] for index in output]
letter_notes = [note[0] for note in output]
print("Generated output:", output)
print("Generated notes:", letter_notes)

ValueError: cannot reshape array of size 5 into shape (1,4)

In [149]:
import pygame
import random


def play_piano_note(output):
    """
    Play a piano note.
    """
    
    pygame.mixer.init()
    pygame.init()
    for note in output:
    
        pygame.mixer.music.load(f"piano_notes/{note}.wav")
        pygame.mixer.music.play()
        pygame.time.wait(500) 
        
        pygame.mixer.music.load(f"misc/drum_kick.wav")
        pygame.mixer.music.play()
        pygame.time.wait(500) 
        
        pygame.mixer.music.load(f"piano_notes/{note}.wav")
        pygame.mixer.music.play()
        pygame.time.wait(500)   
        
        pygame.mixer.music.load(f"misc/drum_snare.wav")
        pygame.mixer.music.play()
        pygame.time.wait(500)  
      
    # Quit pygame
    pygame.quit()

    
play_piano_note(output)


KeyboardInterrupt: 

In [99]:

def play_piano_note(output):
    """
    Play a piano note.
    """

    
    pygame.mixer.init()
    pygame.init()
    for note in output:

        pygame.mixer.music.load(f"misc/drum_kick.wav")
        pygame.mixer.music.play()
        pygame.time.wait(1000) 
        
        pygame.mixer.music.load(f"misc/drum_snare.wav")
        pygame.mixer.music.play()
        pygame.time.wait(1000)  
    # Quit pygame
    pygame.quit()

    
play_piano_note(output)


KeyboardInterrupt: 