# 1.load data

In [2]:
import numpy as np
import pandas as pd
from collections import Counter
np.random.seed(24)

# read file
data = pd.read_csv(r'pop_music_chords.csv')

In [3]:
data.head(16)

Unnamed: 0,X...,X.....,X.......,X......1,X......2,X....,X....1,X..,X....2,X.....1
0,F,Am,C,C,C,Dm7,Am,Em,C,C
1,Em7,Em,Em,G/B,C/B,Am,F,D,Am,Em7
2,Dm,F,Am,Am,Am7,Dm7,G,C,F,Am7
3,C,G,F,Em,C/G,Am,C,G/B,G,Em/G
4,F,C,C,Fmaj7,C/F,F,Am,Am7,Em,C
5,Em7,G/B,Em,G,C/E,C,F,B7,Am,Em7
6,Dm,Am,Am,C,Dm7,Dm7,G,Em,F,F
7,C,Em,F,Am,G5,G,C,B7,G,G
8,C7,F,C,Dm7,C,Dm7,Am,Em,C,C
9,F,G,Em,Fmaj7,C/B,Am,F,D,Am,Em7


# 2.train test split

In [4]:
data.shape

(47, 10)

In [5]:
n = 2
progression = []
for i in range(0,data.shape[1]):
    chords=data.iloc[:,i].values
    chords=chords[~pd.isna(chords)]
    progression.append(chords)

In [6]:
 ####split into test and train
from sklearn.model_selection import train_test_split
train, test = train_test_split(progression, test_size=0.2, random_state=24)

In [7]:
bigrams=[]
for w in range(0,8):
    ngrams = zip(*[train[w][i:] for i in range(n)])
    rams = [" ".join(ngram) for ngram in ngrams]
    bigrams=np.hstack((bigrams,rams))

bigrams=bigrams.tolist()

In [24]:
bigrams[1]

'Am Dm7'

# 3.Predict next state with Markov chain

In [13]:
def predict_next_state(chord:str, data:list=bigrams):
    """Predict next chord based on current state."""
    # create list of bigrams which stats with current chord
    bigrams_with_current_chord = [bigram for bigram in bigrams if bigram.split(' ')[0]==chord]
    # count appearance of each bigram
    count_appearance = dict(Counter(bigrams_with_current_chord))
    # convert apperance into probabilities
    for ngram in count_appearance.keys():
        count_appearance[ngram] = count_appearance[ngram]/len(bigrams_with_current_chord)
    # create list of possible options for the next chord
    options = [key.split(' ')[1] for key in count_appearance.keys()]
    # create  list of probability distribution
    probabilities = list(count_appearance.values())
    # return random prediction
    return np.random.choice(options, p=probabilities)

In [25]:
predict_next_state("C",bigrams)

'G/B'

# 4.Generate sequence

In [21]:
def generate_sequence(chord:str=None, data:list=bigrams, length:int=30):
    """Generate sequence of defined length."""
    # create list to store future chords
    chords = []
    for n in range(length):
        # append next chord for the list
        chords.append(predict_next_state(chord, bigrams))
        # use last chord in sequence to predict next chord
        chord = chords[-1]
    return chords

In [22]:
generate_sequence('F')

['G',
 'Em',
 'Am',
 'F',
 'G',
 'C',
 'Am',
 'F',
 'C/E',
 'Am7',
 'B7',
 'Em',
 'Fmaj7',
 'C/E',
 'Bb',
 'F/A',
 'G',
 'C',
 'Dm7',
 'Am',
 'Fm',
 'G',
 'C',
 'C/B',
 'Am7',
 'B7',
 'Em',
 'Am',
 'G',
 'Am']

# 5.evaluate generation

In [26]:
import math
import sys
from fractions import Fraction
import warnings
from collections import Counter
from nltk.util import ngrams
from nltk.translate.bleu_score import sentence_bleu
def closest_ref_length(references, hyp_len):
    ref_lens = (len(reference) for reference in references)
    closest_ref_len = min(
        ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
    )
    return closest_ref_len
def brevity_penalty(closest_ref_len, hyp_len):
    if hyp_len > closest_ref_len:
            return 1
        # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
    elif hyp_len == 0:
            return 0
    else:
            return math.exp(1 - closest_ref_len / hyp_len)

def generate_combined(l,start):
    result=[]
    for i in range(0,len(start)):
        a=generate_sequence(start[i],bigrams,l)
        result.append(a)
    for i in range(0,len(start)):
        result[i].insert(0, start[i])
    return result

In [29]:
generate_combined(20,"C")

[['C',
  'G/B',
  'Am',
  'Fm',
  'G',
  'C',
  'Am',
  'Dm7',
  'G',
  'Am',
  'Dm7',
  'Am',
  'Dm7',
  'G',
  'C',
  'F',
  'Em7',
  'Am7',
  'Dm7',
  'Am',
  'F']]

In [30]:
generate_combined(30,"C")

[['C',
  'G/B',
  'Am7',
  'B7',
  'Em',
  'Em',
  'Fmaj7',
  'G',
  'C',
  'Am',
  'Dm7',
  'Am',
  'Em',
  'Em',
  'Dm',
  'C',
  'Am',
  'F',
  'C',
  'Em',
  'Em',
  'Dm',
  'C',
  'Em7',
  'Dm',
  'C',
  'G/B',
  'Am',
  'Fm',
  'Gsus4',
  'G']]

In [31]:
generate_combined(40,"C")

[['C',
  'C7',
  'Fmaj7',
  'Gsus4',
  'G',
  'Em',
  'Em',
  'Am',
  'Dm7',
  'Am',
  'F',
  'Em',
  'Am',
  'F',
  'G',
  'C',
  'C/B',
  'Am7',
  'Em/G',
  'C',
  'Am',
  'F',
  'G',
  'Gm',
  'F',
  'Dm7',
  'G',
  'C',
  'G/B',
  'Am7',
  'C/G',
  'C/F',
  'C/E',
  'Bb',
  'F/A',
  'G',
  'C',
  'Em7',
  'Am7',
  'C/G',
  'C/F']]

In [32]:
generate_combined(20,"F")

[['F',
  'G',
  'C',
  'Am',
  'Dm7',
  'G',
  'C',
  'Am',
  'F',
  'Dm7',
  'Am',
  'Fm',
  'G',
  'C',
  'Am',
  'F',
  'Em7',
  'F',
  'G',
  'C',
  'G/B']]

In [33]:
generate_combined(30,"F")

[['F',
  'G',
  'C',
  'Em7',
  'Dm',
  'C',
  'Dm7',
  'C/E',
  'Dm7',
  'G',
  'Dm7',
  'G',
  'C',
  'Am',
  'F',
  'G',
  'C',
  'Em7',
  'Dm',
  'C',
  'Am',
  'F',
  'G',
  'C',
  'Dm7',
  'G',
  'C',
  'Em7',
  'Dm',
  'C',
  'Dm7']]

In [34]:
generate_combined(40,"F")

[['F',
  'C',
  'Dm7',
  'G',
  'Am',
  'F',
  'G',
  'G',
  'Dm7',
  'G',
  'C',
  'Em',
  'Fmaj7',
  'Gsus4',
  'G',
  'C',
  'Am',
  'Em',
  'Fmaj7',
  'G',
  'C',
  'Am',
  'F',
  'G',
  'C',
  'Fmaj7',
  'G',
  'C',
  'C/B',
  'Am7',
  'Gm7',
  'C7',
  'Fmaj7',
  'G',
  'F',
  'G',
  'C',
  'Fmaj7',
  'C/E',
  'Dm7',
  'G']]

In [27]:
reference1 = test[0]
reference2 = test[1]
references = [reference1, reference2]

weights=[(0,0,0,0.5,0,0,0,0.5),(0.25,0.25,0.25,0.25),(0,0,0,0,0,0,0,1),(0,0,0,1),(1,0,0,0)]
leng=[20,30,40]
start=['C','F']
for l in leng:
    result=generate_combined(l,start)
    for w in range(0,len(weights)):
       for i in range(0,len(start)):
            hypothesis1 = np.array(result[i])
            score=sentence_bleu(references, hypothesis1, weights[w])
            hyp_len = len(hypothesis1)
            closest_ref_len=closest_ref_length(references, hyp_len)
            bp=brevity_penalty(closest_ref_len, hyp_len)
            print(score/bp)
            print(weights[w])
            print(l)
            print(start[i])

6.08970970641905e-155
(0, 0, 0, 0.5, 0, 0, 0, 0.5)
20
C
6.08970970641905e-155
(0, 0, 0, 0.5, 0, 0, 0, 0.5)
20
F
0.36064732832591173
(0.25, 0.25, 0.25, 0.25)
20
C
0.39788427553162475
(0.25, 0.25, 0.25, 0.25)
20
F
2.2250738585072626e-308
(0, 0, 0, 0, 0, 0, 0, 1)
20
C
2.2250738585072626e-308
(0, 0, 0, 0, 0, 0, 0, 1)
20
F
0.16666666666666669
(0, 0, 0, 1)
20
C
0.16666666666666669
(0, 0, 0, 1)
20
F
0.7142857142857143
(1, 0, 0, 0)
20
C
0.9523809523809523
(1, 0, 0, 0)
20
F
3.9866508128531494e-155
(0, 0, 0, 0.5, 0, 0, 0, 0.5)
30
C
6.303448402188771e-155
(0, 0, 0, 0.5, 0, 0, 0, 0.5)
30
F
0.2089685256289425
(0.25, 0.25, 0.25, 0.25)
30
C
0.4493946880757933
(0.25, 0.25, 0.25, 0.25)
30
F
2.2250738585072626e-308
(0, 0, 0, 0, 0, 0, 0, 1)
30
C
2.2250738585072626e-308
(0, 0, 0, 0, 0, 0, 0, 1)
30
F
0.07142857142857141
(0, 0, 0, 1)
30
C
0.17857142857142858
(0, 0, 0, 1)
30
F
0.5806451612903226
(1, 0, 0, 0)
30
C
0.9032258064516129
(1, 0, 0, 0)
30
F
0.11470786693528089
(0, 0, 0, 0.5, 0, 0, 0, 0.5)
40
C
4.839

The hypothesis contains 0 counts of 6-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 7-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 8-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
