In [15]:
!pip install shap



In [16]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import shap
from sklearn.compose import make_column_transformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor

In [17]:
results_dict = {} 

def temposhap(melody_num, freq, stepsizes, contours, pre_contours, post_contours, start_tempo, iters = 5):
    
    df = pd.read_csv("Melody"+str(melody_num)+"_"+str(start_tempo)+"ms.csv")

    pd.options.mode.chained_assignment = None 
    df_fewcycles = df.head(iters*freq)
    df_fewcycles.set_index('roundedbeat', inplace=True)

    beats = list(range(freq)) * iters
    beats = np.add(beats, 1).tolist()
    df_fewcycles.loc[:,'beat'] = beats

    stepsize = np.tile(stepsizes, iters)
    df_fewcycles.loc[:,'stepsize'] = stepsize

    contour = np.tile(contours, iters)
    df_fewcycles.loc[:,'contour'] = contour

    pre_contour = np.tile(pre_contours, iters)
    df_fewcycles.loc[:,'pre_contour'] = pre_contour

    post_contour = np.tile(post_contours, iters)
    df_fewcycles.loc[:,'post_contour'] = post_contour

    iternum = np.repeat(list(range(iters)), freq)
    iternum = np.add(iternum, 1).tolist()
    df_fewcycles.loc[:,'iternum'] = iternum
    
    categorical_features = ["beat"]
    
    passthrough_features = ["stepsize", "iternum"]
    binary_features = ["contour", "pre_contour", "post_contour"]
    target = "meanofmeantempo"

    train_df, test_df = train_test_split(df_fewcycles, test_size=0.2, random_state=123)
    X_train, y_train = (
      train_df.drop(columns=[target]),
      train_df[target],
    )
    X_test, y_test = (
      test_df.drop(columns=[target]),
      test_df[target],
    )

    preprocessor = make_column_transformer(
      ("passthrough", passthrough_features),  
      
      (OneHotEncoder(drop="if_binary", dtype=int, sparse=False), 
          binary_features), 
             
      (OneHotEncoder(handle_unknown = "ignore", sparse= False), 
       categorical_features), 

    )

    transformed = preprocessor.fit_transform(X_train, y_train)
    transformed.shape


    ohe_columns = list(
      preprocessor.named_transformers_["onehotencoder-2"].get_feature_names_out().tolist()
    )
    new_columns = (
      passthrough_features + binary_features + ohe_columns
    )

    new_columns

    X_train_enc = pd.DataFrame(transformed, index=X_train.index, columns=new_columns)
    X_train_enc

    pipe_rf = make_pipeline(preprocessor, RandomForestRegressor(random_state=123, n_estimators = 5, n_jobs = -1))
    pipe_rf.fit(X_train, y_train);
    data = {
      "Importance":  np.round(pipe_rf.named_steps["randomforestregressor"].feature_importances_, 3),
    }
    imps = pd.DataFrame(data=data, index=new_columns,).sort_values(
      by="Importance", ascending=False
    )
    imps

    rf_explainer = shap.TreeExplainer(pipe_rf.named_steps["randomforestregressor"])
    train_rf_shap_values = rf_explainer.shap_values(X_train_enc)
    
    
    data={}
    
    data["train_rf_shap_values"]=train_rf_shap_values
    data["X_train_enc"]=X_train_enc
    data["X_train"]=X_train
    data["y_train"]=y_train
    data["X_test"]=X_test
    data["y_test"]=y_test
    data["test_score"]=pipe_rf.score(X_test, y_test)
    results_dict[str(melody_num) + "_" + str(start_tempo)] = data
    
    return data

In [18]:


melodies=[5,7,10,12,18,22,23,24,25,27,33,36,37,38,39,40,44,45,46,51,52,29]
freqs = [16,26,24,26,17,24,23,22,20,16,21,22,29,14,20,15,16,14,17,20,21,20]
iters = 5
stepsizes = [
    [2, 1, 0, 0, 4, -4, -5, -3, 5, -4, -1, -2, -2, 2, -2, 9],
    [-5, -3, 3, -7, 4, 3, 3, -1, -4, -5, 5, 4, -4, 7, -3, -2, 3, 4, -2, -2, -1, -2, -2, 0, -1, 8],
    [-2, -1, -2, -2, 2, 2, 1, 2, 2, -2, 2, -4, -1, -2, 3, -3, -3, 1, 2, 2, 1, -1, -2, 5],
    [2, 2, 1, -1, 1, 2, 2, -2, -12, 2, 5, -3, 1, 5, -3, 2, 3, -7, 2, 3, -1, -2, -2, -1, 1, 0],
    [0, 5, -1, 1, 2, 2, -4, -5, 7, -1, 1, 2, -2, 3, -3, 2, -9],
    [2, 1, 2, 2, -4, -1, -2, 3, -1, -2, 0, 2, 1, 2, 2, -4, -1, -2, 0, 5, -2, -3, -2, 2],
    [0, 0, 0, 0, 7, -2, 4, -2, -2, -1, -2, -2, -1, 3, 2, 1, 2, 2, -4, 4, -4, -1, -4],
    [9, -4, 0, -1, -4, 9, -4, 0, -1, 3, 2, 1, 2, -3, -2, 7, -12, -2, 7, 3, -1, -9],
    [0, 1, -1, -2, -2, -1, 1, 1, 1, 3, 0, 2, -2, -1, -2, -2, 2, 1, 1, 0],
    [0, -4, 4, 1, 2, 0, 0, -3, -4, 0, 2, 2, 3, -2, -1, 0],
    [3, -2, 11, -2, -3, -4, -2, -1, 10, -2, 7, -2, -1, 0, -2, 3, -3, -2, -1, 1, 8],
    [-2, 0, -1, -2, -2, 2, 2, 1, 2, -2, -1, 5, 2, 1, -5, -3, -2, 2, 1, 0, -1, 3],
    [0, 2, 2, 1, 2, 2, -2, 0, 2, -2, -2, -1, -2, 2, -4, 2, 2, 1, 2, 2, -2, 0, -2, -1, -2, -2, -1, 1, 0],
    [5, 4, -2, 3, -1, 5, -2, -12, 2, 5, -2, -1, 1, -5],
    [-2, -1, 5, -2, 0, -2, 0, -1, -2, -2, 0, -5, 2, 0, 2, 1, 2, -3, 1, 7],
    [4, 3, 0, 0, 2, -2, -2, -1, -2, -2, -1, 1, 2, -2, 0],
    [9, 3, -7, 4, -2, 3, -10, 10, -1, -4, -1, 3, -2, 4, 3, -12],
    [-2, 7, -2, -2, -1, 3, -8, 3, -2, 4, 3, -3, -4, 4],
    [-3, 3, -7, 4, -2, 3, -10, 10, -1, -4, -1, 1, 2, -2, 4, 3, 0],
    [-3, -4, -1, 1, 2, -2, 2, 2, 1, -1, 3, -3, -4, 5, -1, -2, -2, -1, 1, 7],
    [2, 1, -5, -2, -1, 1, 2, -2, -1, 5, 2, 1, 2, 2, 1, 0, 0, 0, 0, -1, -7],
    [2, -2, 2, 2, 1, 4, -2, 2, 2, 1, -5, 2, -4, -1, 3, -2, -3, 0, -2, 0]]

contours = [[1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1], 
            [1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1], 
            [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1],
            [0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0],
            [1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1],
            [1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1],
            [1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0],
            [1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0],
            [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0], 
            [0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1],
            [1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1],
            [1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1], 
            [0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0], 
            [1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1], 
            [1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0], 
            [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1], 
            [1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1], 
            [1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1], 
            [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0], 
            [1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0], 
            [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            [0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1]]
            
pre_contours = [[0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1], 
                [0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0], 
                [0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1], 
                [0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0], 
                [0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1], 
                [0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0], 
                [0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0], 
                [0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0], 
                [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], 
                [1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0], 
                [1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0], 
                [1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0], 
                [0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0],
                [0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1], 
                [0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0], 
                [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0], 
                [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0], 
                [1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0], 
                [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0], 
                [0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0], 
                [0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], 
                [1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0]]
post_contours = [[1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1], 
                 [1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0], 
                 [1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0], 
                 [0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1],
                 [1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1],
                 [1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0], 
                 [0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0],
                 [0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1],
                 [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
                 [1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0],
                 [1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1],
                 [1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1], 
                 [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1], 
                 [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1], 
                 [0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1], 
                 [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1], 
                 [1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0], 
                 [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0],
                 [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0],
                 [0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1],
                 [0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 
                 [1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1]]


In [None]:
# Generating SHAP Plots
# for i in range(12,22):
#     for starting_temp in [300,375,450,525,600]:
#         if (melodies[i]== 7 and starting_temp == 525):
#             continue
#         if (melodies[i]== 33 and starting_temp == 525):
#             continue
#         if (melodies[i]== 37 and starting_temp == 525):
#             continue
#         print(melodies[i], freqs[i], stepsizes[i], contours[i],pre_contours[i],post_contours[i],starting_temp)
#         temposhap(melodies[i], freqs[i], stepsizes[i], contours[i],pre_contours[i],post_contours[i],starting_temp)

In [19]:
# Storing SHAP values
results_dict = {} 
for i in range(22):
    for starting_temp in [300,375,450,525,600]:
        if (melodies[i]== 7 and starting_temp == 525):
            continue
        if (melodies[i]== 33 and starting_temp == 525):
            continue
        if (melodies[i]== 37 and starting_temp == 525):
            continue
        results_dict[str(melodies[i]) + "_" + str(starting_temp)] = temposhap(melodies[i], freqs[i], stepsizes[i], contours[i],pre_contours[i],post_contours[i],starting_temp)
        
        # Example: results_dict["24_525"]["train_rf_shap_values"]

In [None]:
# average SHAP values of each feature (as files) for each beat (as columns) of each starting tempo (as rows)  
# only first two continous features used this function
# import statistics as st
# import csv
# feature_names =["stepsize", "iternum","contour", "pre_contour", "post_contour","beat_1"]
# for feature in range(6): 
#     f = open('feature/feature'+ "_" +
#              feature_names[feature] +
#              '.csv', 'w')
#     writer = csv.writer(f)
#     tempos = [] #one csv file
#     title = []
#     title[0:22] = melodies[0:22]
#     writer.writerow(title)
#     for starting_temp in [300,375,450,525,600]:
#         avg = [] #one row in csv file
#         # avg.append(str(starting_temp))
#         for i in range(22):
#             if (melodies[i]== 7 or melodies[i]== 33 or melodies[i]== 37) and starting_temp == 525:
#                 avg.append('')
#                 continue
#             avg.append(st.mean(results_dict[str(melodies[i]) + "_" + str(starting_temp)]["train_rf_shap_values"][:,feature])) 
#         writer.writerow(avg)
#     f.close()

In [None]:
# calculate average shap values only when corresponding binary feature is 1 for columns 3 to 6
# import statistics as st
# import csv
# feature_names =["stepsize", "iternum","contour", "pre_contour", "post_contour","beat_1"]
# for feature in range(2,6):
#     f = open('feature/feature_new'+ "_" +
#              feature_names[feature] +
#              '.csv', 'w')
#     writer = csv.writer(f)
#     tempos = [] #one csv file
#     title = []
#     title[0:22] = melodies[0:22]
#     writer.writerow(title)
#     for starting_temp in [300,375,450,525,600]:
#         avg = [] #one row in csv file
#         for i in range(22):
#             if (melodies[i]== 7 or melodies[i]== 33 or melodies[i]== 37) and starting_temp == 525:
#                 avg.append('')
#                 continue
            
#             shap_tmp = results_dict[str(melodies[i]) + "_" + str(starting_temp)]["train_rf_shap_values"][:,feature]
#             x_enc_tmp = results_dict[str(melodies[i]) + "_" + str(starting_temp)]["X_train_enc"][feature_names[feature]]
          
#             dot = np.dot(np.array(shap_tmp), np.array(x_enc_tmp))
#             denominator = sum(np.array(x_enc_tmp))
#             avg.append(dot/denominator) 
#         writer.writerow(avg)
#     f.close()