# Making Markov Chains of Various Types

First we'll load the full list of pickled sequences *(this may take a moment or two)*.

In [1]:
import pickle

In [2]:
with open('all_melodies.pkl', 'r') as f:
    all_melodies = pickle.load(f)
with open('all_rhythms.pkl', 'r') as g:
    all_rhythms = pickle.load(g)

In [3]:
assert len(all_melodies) == len(all_rhythms)

Next, we'll define a **Markov** object that will store Markov chain information for chains of various orders and types.

In [48]:
class Markov(object):
    """A container for holding Markov chains of various orders, 
    where the events in the given 'current state' may occur temporally 
    before and/or after the note to be examined as the 'next state'.
    """
    
    def __init__(self, before = 0, after = 0, mode = 0):
        """Initialize a Markov object with `before` notes before the
        space to be filled, `after` notes after the space to be filled,
        and with `mode` equal to 0, 1 or 2 according to whether the 'current
        state' contains notes only before, only after, or both before and after
        the note to be filled in the 'next state'.
        
        Inputs: 
        before - int - number of notes before next state
        after - int - number of notes after next state
        mode - int in range(3) - mode of the chain(see above)
        
        Outputs - Markov object
        """
        
        if mode == 0:
            assert before != 0 and after == 0
        elif mode == 1:
            assert before == 0 and after != 0
        else:
            assert before != 0 and after != 0

        self.before = before
        self.after = after
        self.mode = mode
        self.state_dict = {}
        
    def add_data(self, seq, result):
        """Add one (current state -> next state) instance to the dictionary.
        Store each instance as a tally, to be normalized later.
        
        Inputs:
        seq - if mode = 0, seq is a tuple of length before
              if mode = 1, seq is a tuple of length after
              if mode = 2, seq is a list of length two, with first element
                           a tuple of length before and with second element
                           a tuple of length after
        result - int - single event
        
        Outputs:
        None
        """
        
        if self.mode == 0:
            assert isinstance(seq, tuple) and len(seq) == self.before
        elif self.mode == 1:
            assert isinstance(seq, tuple) and len(seq) == self.after
        else:
            #print seq[0]
            #print seq[1]
            assert (isinstance(seq, tuple) and len(seq) == 2 and
                    isinstance(seq[0], tuple) and len(seq[0]) == self.before and
                    isinstance(seq[1], tuple) and len(seq[1]) == self.after)
            
        if seq not in self.state_dict:
            self.state_dict[seq] = {result: 1}
        elif result in self.state_dict[seq]:
            self.state_dict[seq][result] += 1
        else:
            self.state_dict[seq][result] = 1
            
    def normalize(self):
        """Convert the state_dict dictionary from counts to probabilities.
        
        Inputs: None
        
        Outputs: None
        """
        
        for seq in self.state_dict:
            sum = 0
            for result in self.state_dict[seq]:
                sum += self.state_dict[seq][result]
            for result in self.state_dict[seq]:
                self.state_dict[seq][result] /= float(sum)
        
            

Now to test!

In [5]:
markov0 = Markov(1, 0, 0)
melody0 = all_melodies[0]
for i in range(len(melody0) - 1):
    markov0.add_data((melody0[i],), melody0[i + 1])
markov0.normalize()

TypeError: unhashable type: 'list'

In [6]:
print markov0.state_dict

{}


In [7]:
print melody0

[[72], [73], [75], [68], [73], [76], [68], [73], [68], [71], [76], [68], [71], [68], [70], [76], [68], [70], [70], [73], [78], [70], [73], [65], [58], [60], [62]]


Yay, this handles traditional (order 1) Markov models perfectly!

In [8]:
markov0 = Markov(1, 0, 0)
melody2 = all_melodies[2]
for i in range(len(melody2) - 1):
    markov0.add_data((melody2[i]), melody2[i + 1])
markov0.normalize()

AssertionError: 

It seems as though the chords (shown below as inner lists) give us a bit of an issue, though.

In [9]:
print melody2

[[61], [59], [58], [54], [56], [60], [58], [60], [49], [61], [47], [46], [42], [44], [60], [51], [58], [60], [51], [58], [60], [61], [49], [49, 59], [42, 58], [56, 68], [58, 70], [72, 60], [75, 63], [67, 79], [80, 68], [66, 78], [80, 68], [66, 78], [80, 63], [65], [72], [74], [72], [71], [69], [66, 67], [63], [77], [65], [74], [70], [66], [68], [80, 75], [72], [82], [77], [58], [53], [58, 53], [82], [77], [58, 53], [53], [58], [82], [77], [53], [58], [53], [58], [77], [82], [53], [58], [58, 53], [65], [67], [69], [70], [71], [72], [68], [70], [72], [73], [74], [75], [68], [70], [72], [73], [75], [77], [68], [70], [72], [73], [75], [77, 62], [53], [60], [62], [53], [60], [62], [63], [51], [51, 61], [60, 44], [58, 70], [72, 60], [74, 62], [65, 77], [81, 69], [82, 70], [80, 68], [82, 70], [80, 68], [58, 66, 54, 70], [66, 58, 70, 54], [58, 70, 66, 54], [58, 70, 66, 54], [65], [77], [75], [63], [73], [61], [72], [60], [58], [70], [56], [68], [82, 70], [72, 84], [74, 86], [64, 76], [65, 77],

One answer to this is to actually convert all the integers to lists so we can use itertools.product on the slice of the sequence we are looking for.

In [10]:
for melody in all_melodies:
    for i in range(len(melody)):
        if isinstance(melody[i], int):
            melody[i] = [melody[i]]

In [11]:
print all_melodies[0]

[[72], [73], [75], [68], [73], [76], [68], [73], [68], [71], [76], [68], [71], [68], [70], [76], [68], [70], [70], [73], [78], [70], [73], [65], [58], [60], [62]]


In [31]:
import itertools

def iterate_melody(mel, mark, before, after):
    full_length = before + after + 1
    for i in range(len(mel) - full_length):
        for seq in itertools.product(*mel[i:i + full_length]):
            before_seq, after_seq, val = seq[:before], seq[-after:], seq[before]
            mode = mark.mode
            if mode == 0:
                mark.add_data(before_seq, val)
            elif mode == 1:
                mark.add_data(after_seq, val)
            else:
                #print (before_seq, after_seq)
                mark.add_data((before_seq, after_seq), val)
            
            
markov = Markov(1, 0, 0)
iterate_melody(all_melodies[0], markov, before = 1, after = 0)
markov.normalize()
print markov.state_dict

{(58,): {60: 1}, (65,): {58: 1}, (72,): {73: 1}, (73,): {68: 0.2, 75: 0.2, 76: 0.2, 78: 0.2, 65: 0.2}, (68,): {73: 0.3333333333333333, 70: 0.3333333333333333, 71: 0.3333333333333333}, (75,): {68: 1}, (70,): {73: 0.5, 76: 0.25, 70: 0.25}, (76,): {68: 3}, (71,): {68: 0.5, 76: 0.5}, (78,): {70: 1}}


In [21]:
markov = Markov(2, 0, 0)
iterate_melody(all_melodies[0], markov, before = 2, after = 0)
markov.normalize()
print markov.state_dict

{(71, 68): {70: 1}, (68, 71): {68: 0.5, 76: 0.5}, (76, 68): {73: 0.3333333333333333, 70: 0.3333333333333333, 71: 0.3333333333333333}, (68, 73): {68: 0.5, 76: 0.5}, (68, 70): {76: 0.5, 70: 0.5}, (70, 70): {73: 1}, (71, 76): {68: 1}, (70, 73): {65: 0.5, 78: 0.5}, (78, 70): {73: 1}, (72, 73): {75: 1}, (65, 58): {60: 1}, (73, 75): {68: 1}, (73, 68): {71: 1}, (70, 76): {68: 1}, (75, 68): {73: 1}, (73, 78): {70: 1}, (73, 76): {68: 1}, (73, 65): {58: 1}}


In [22]:
print all_melodies[0]

[[72], [73], [75], [68], [73], [76], [68], [73], [68], [71], [76], [68], [71], [68], [70], [76], [68], [70], [70], [73], [78], [70], [73], [65], [58], [60], [62]]


In [23]:
with open('all_melodies.pkl', 'w') as f:
    pickle.dump(all_melodies, f)

In [24]:
markov = Markov(0, 1, 1)
iterate_melody(all_melodies[0], markov, before = 0, after = 1)
markov.normalize()
print markov.state_dict

{(58,): {65: 1}, (65,): {73: 1}, (60,): {58: 1}, (73,): {72: 0.2, 68: 0.4, 70: 0.4}, (68,): {73: 0.16666666666666666, 75: 0.16666666666666666, 76: 0.5, 71: 0.16666666666666666}, (75,): {73: 1}, (70,): {68: 0.5, 78: 0.25, 70: 0.25}, (76,): {73: 0.3333333333333333, 70: 0.3333333333333333, 71: 0.3333333333333333}, (71,): {68: 2}, (78,): {73: 1}}


In [33]:
markov = Markov(2, 2, 2)
iterate_melody(all_melodies[0], markov, before = 2, after = 2)
markov.normalize()
print markov.state_dict

{((75, 68), (76, 68)): {73: 1}, ((73, 78), (73, 65)): {70: 1}, ((70, 76), (70, 70)): {68: 1}, ((68, 73), (71, 76)): {68: 1}, ((71, 68), (76, 68)): {70: 1}, ((73, 76), (73, 68)): {68: 1}, ((71, 76), (71, 68)): {68: 1}, ((72, 73), (68, 73)): {75: 1}, ((76, 68), (68, 71)): {73: 1}, ((76, 68), (70, 73)): {70: 1}, ((78, 70), (65, 58)): {73: 1}, ((68, 71), (70, 76)): {68: 1}, ((68, 71), (68, 71)): {76: 1}, ((73, 75), (73, 76)): {68: 1}, ((70, 73), (70, 73)): {78: 1}, ((70, 70), (78, 70)): {73: 1}, ((68, 70), (73, 78)): {70: 1}, ((68, 70), (68, 70)): {76: 1}, ((68, 73), (68, 73)): {76: 1}, ((76, 68), (68, 70)): {71: 1}, ((73, 68), (76, 68)): {71: 1}, ((70, 73), (58, 60)): {65: 1}}


This seems to work pretty well for melodies!

In [34]:
mark_a = Markov(1, 0, 0)
for i in range(len(all_melodies)):
    iterate_melody(all_melodies[i], mark_a, before = 1, after = 0)

{((75, 68), (76, 68)): {73: 1}, ((73, 78), (73, 65)): {70: 1}, ((70, 76), (70, 70)): {68: 1}, ((68, 73), (71, 76)): {68: 1}, ((71, 68), (76, 68)): {70: 1}, ((73, 76), (73, 68)): {68: 1}, ((71, 76), (71, 68)): {68: 1}, ((72, 73), (68, 73)): {75: 1}, ((76, 68), (68, 71)): {73: 1}, ((76, 68), (70, 73)): {70: 1}, ((78, 70), (65, 58)): {73: 1}, ((68, 71), (70, 76)): {68: 1}, ((68, 71), (68, 71)): {76: 1}, ((73, 75), (73, 76)): {68: 1}, ((70, 73), (70, 73)): {78: 1}, ((70, 70), (78, 70)): {73: 1}, ((68, 70), (73, 78)): {70: 1}, ((68, 70), (68, 70)): {76: 1}, ((68, 73), (68, 73)): {76: 1}, ((76, 68), (68, 70)): {71: 1}, ((73, 68), (76, 68)): {71: 1}, ((70, 73), (58, 60)): {65: 1}}


In [39]:
mark_a.normalize()
print mark_a.state_dict

{(32,): {13: 1.3626020248266093e-05, 14: 1.3626020248266093e-05, 16: 8.175612148959656e-05, 17: 1.3626020248266093e-05, 18: 6.131709111719743e-05, 19: 3.406505062066524e-05, 20: 0.0014852362070610042, 21: 4.769107086893132e-05, 22: 0.0006336099415443733, 23: 0.00044284565806864806, 24: 0.0005246017795582446, 25: 0.0076169453187807466, 26: 0.00981754758887572, 27: 0.019110493398193196, 28: 0.01229748327406015, 29: 0.01620815108531252, 30: 0.039290629385875284, 31: 0.03671531155895299, 32: 0.40460423224188924, 33: 0.06311572578996855, 34: 0.0683821826159234, 35: 0.03385384730681711, 36: 0.02137241275940537, 37: 0.04299690689340366, 38: 0.008509449645042176, 39: 0.03340418863862433, 40: 0.00572292850427176, 41: 0.0030113504748668063, 42: 0.005491286160051237, 43: 0.002316423442205236, 44: 0.08025044625216315, 45: 0.00782133562250474, 46: 0.0050756925424791195, 47: 0.0015261142678058023, 48: 0.0012604068729646137, 49: 0.001294471923585279, 50: 0.0005654798403030429, 51: 0.00697652236711224

In [40]:
with open('mark100.pkl', 'w') as f:
    pickle.dump(mark_a, f)

Note that this pickled file

In [42]:
mark_b = Markov(0, 1, 1)
for i in range(len(all_melodies)):
    iterate_melody(all_melodies[i], mark_b, before = 0, after = 1)

In [43]:
mark_b.normalize()
print mark_b.state_dict

{(32,): {14: 1.3608960139355751e-05, 18: 2.0413440209033626e-05, 19: 6.804480069677876e-06, 20: 0.0017691648181162476, 21: 0.0001428940814632354, 22: 3.402240034838938e-05, 23: 0.0008369510485703788, 24: 0.002136606741878853, 25: 0.0034226534750479718, 26: 0.00938337801608579, 27: 0.02149535254011241, 28: 0.01848096786924511, 29: 0.029048325417454852, 30: 0.047651773927954166, 31: 0.046739973598617326, 32: 0.40409765789796004, 33: 0.03844531239368, 34: 0.05289802806167581, 35: 0.021536179420530478, 36: 0.018630666430778025, 37: 0.050754616839727276, 38: 0.00926089737483159, 39: 0.03216477728936732, 40: 0.005470801976021012, 41: 0.009526272097549027, 42: 0.012214041725071787, 43: 0.004797158449122902, 44: 0.07465195084443597, 45: 0.008199398483961841, 46: 0.00502851077149195, 47: 0.0015242035356078441, 48: 0.003347804194281515, 49: 0.0025516800261292036, 50: 0.0008301465685007009, 51: 0.0067704576693294865, 52: 0.0008437555286400566, 53: 0.0017079244974891469, 54: 0.012125583484165975, 

In [44]:
with open('mark011.pkl', 'w') as f:
    pickle.dump(mark_b, f)

In [45]:
mark_c = Markov(1, 1, 2)
for i in range(len(all_melodies)):
    iterate_melody(all_melodies[i], mark_c, before = 1, after = 1)

In [46]:
mark_c.normalize()
print mark_c.state_dict

{((60,), (35,)): {28: 0.0017528483786152498, 29: 0.0035056967572304996, 31: 0.0008764241893076249, 32: 0.0008764241893076249, 33: 0.014022787028921999, 34: 0.0052585451358457495, 35: 0.08238387379491674, 36: 0.06748466257668712, 38: 0.0035056967572304996, 39: 0.0026292725679228747, 40: 0.0035056967572304996, 41: 0.0052585451358457495, 42: 0.06748466257668712, 43: 0.006134969325153374, 45: 0.010517090271691499, 47: 0.09903593339176162, 48: 0.06660823838737949, 49: 0.0008764241893076249, 50: 0.0026292725679228747, 51: 0.006134969325153374, 52: 0.018404907975460124, 53: 0.009640666082383873, 54: 0.009640666082383873, 55: 0.042068361086765996, 56: 0.007887817703768623, 57: 0.07186678352322524, 59: 0.03943908851884312, 60: 0.09991235758106924, 61: 0.0017528483786152498, 62: 0.056091148115687994, 63: 0.008764241893076249, 64: 0.05433829973707274, 65: 0.04732690622261174, 66: 0.0017528483786152498, 67: 0.035056967572304996, 68: 0.0026292725679228747, 69: 0.014022787028921999, 70: 0.0043821209

In [47]:
with open('mark112.pkl', 'w') as f:
    pickle.dump(mark_c, f)

In [50]:
for before in range(4):
    for after in range(4):
        if before == 0:
            if after == 0:
                continue
            mode = 1
        elif after == 0:
            mode = 0
        else:
            mode = 2
        mark = Markov(before, after, mode)
        for k in range(len(all_melodies)):
            iterate_melody(all_melodies[k], mark, before = before, after = after)
        mark.normalize()
        with open('markov' + str(before) + str(after) + str(mode) + '.pkl', 'w') as f:
            pickle.dump(mark, f)

KeyboardInterrupt: 

As it turns out, this way of storing the information isn't great either; with Markov chains of order >= 4, the kernel crashed!