# Using realbook chord progression to train a markov model
The goal of the analysis is to produce a Markov Model generating jazz progressions in the key of C (transpose all songs to C). 

Things to keep in mind : 
- Sometimes chords can last a full measure, sometimes half a measure ? Treat as a unit or separate ?
- Sometimes progression lead to modulation to a different key, how to handle that ? 

## Data extraction

In [None]:
import os 
import numpy as np
import json
import ChordalPy as cp
from transposers import transposeRealBookFile
import matplotlib.pyplot as plt

In [None]:
data = []

for filename in os.listdir("/home/nsarrazin/lofi/notebooks/jazz_xlab"):
    if filename.endswith(".xlab"):
        fullpath = os.path.join("/home/nsarrazin/lofi/notebooks/jazz_xlab", filename)
        data.append(transposeRealBookFile(fullpath))

In [None]:
flat_list = [item for sublist in data for item in sublist]
statespace = list(set(flat_list))

matrix = []
for chord in statespace:
    row = [0]*len(statespace) # initialize 0 count for each chord in statespace

    for song in data:
        for i,x in enumerate(song):
            if x==chord:
                try:
                    next_chord = song[i+1]
                    idx = statespace.index(next_chord)
                    row[idx] +=1
                except:
                    pass

    s = sum(row)

    if s > 0:
        row[:] = [f/s for f in row]

    matrix.append(row)

matrix = np.array(matrix)
print(matrix.shape)

In [None]:
deadends = np.where(np.sum(matrix, axis=1)==0) # chords which were at the end of files and dont go anywhere, which would block the markov process

for idx in deadends:
    matrix[:,idx] = 0

In [None]:
np.save("matrix.npy", matrix)
json.dump(statespace, open("statespace.json", "w"))

In [None]:
from networkx.drawing.nx_pydot import write_dot
import networkx as nx


Q = matrix

G = nx.MultiDiGraph(Q)
labels={}
edge_labels={}

n_edge = 0
for i, origin_state in enumerate(statespace):
    for j, destination_state in enumerate(statespace):
        rate = Q[i][j]
        if rate > 0:
            G.add_edge(origin_state, destination_state, weight=rate, label="{:.02f}".format(rate))
            edge_labels[(origin_state, destination_state)] = f"{rate}"
            n_edge +=1