# Regression model for note splitting

In [1]:
import numpy as np
import soundfile as sf

# Using pandas library

In [2]:
import statsmodels.api as sm
import pandas as pd 

In [5]:
filename = 'C4_61_64_67_71.wav'

#number of keys (should be 12) to be considered here
num_notes = 12

#level of significance
alpha = 0.00001

# assuming that we have 12 vectors of length 4096: A,A_sharp,B.......G_sharp
#these are the fourier transforms of the notes
# then we can make a matrix X, with the columns as these notes

#X is a 2048 by 12 matrix
X = np.zeros((2048,num_notes))


#reading the sound file

for i in range(12):
    ref, sample_rate = sf.read('C4_' + str(i+60) +'_x_x_x.wav')
    note = np.fft.fft(ref[20000:24096,0]) 
    note_new = note.real[0:len(note)/2]
    X[:,i] = note_new
    

ref, sample_rate = sf.read(filename)
note = np.fft.fft(ref[20000:24096,0])
note_new = note.real[0:len(note)/2]
Y = note_new
print Y


# define the predictors and target values in data frames
x = pd.DataFrame(X)
y = pd.DataFrame(Y)

# Note the difference in argument order
model = sm.OLS(y, x).fit()
predictions = model.predict(x) # make the predictions by the model

# Print out the statistics
print "summary:", model.summary()


[ 0.19989014  0.33406753  0.24234739 ..., -0.0082267  -0.00873673
 -0.00867961]
summary:                             OLS Regression Results                            
Dep. Variable:                      0   R-squared:                       0.855
Model:                            OLS   Adj. R-squared:                  0.854
Method:                 Least Squares   F-statistic:                     999.2
Date:                Sat, 12 May 2018   Prob (F-statistic):               0.00
Time:                        03:46:29   Log-Likelihood:                -625.93
No. Observations:                2048   AIC:                             1276.
Df Residuals:                    2036   BIC:                             1343.
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------

In [6]:
print "r-squared value:", model.rsquared

involved_notes = []
for i in range(len(model.pvalues)):
    if model.pvalues[i] < alpha:
        involved_notes.append(i+60)
                      
print involved_notes
print "mystery chord is", filename
print "degree of significance, alpha is:", alpha

r-squared value: 0.854838984231
[61, 64, 67, 71]
mystery chord is C4_61_64_67_71.wav
degree of significance, alpha is: 1e-05
