# Machine Learning Project: Key Detection
Authors: Zach Hayes, Matt Gaetano, Max Ivry

Course: CS3540


In [2]:
import pandas as pd
import numpy as np
from scipy.io import wavfile
from scipy.fft import rfft, rfftfreq
from sklearn.preprocessing import StandardScaler

### Data Cleaning and Feature Engineering Pipeline
Data cleaning occured earlier in process. See clean.ipynb

In [3]:
def features_from_audio(df, n_audio_features) -> pd.DataFrame:
    """
    'Join' the existing dataframe with the audio files via
    mapping to frequency features.
    """
    # Determine the size for all instances (largest number of samples)
    longest = 0
    rate = 0
    locations = df["location"].to_numpy()
    for loc in locations:
        path = "dataset/" + loc
        rate, data = wavfile.read(path)
        length = len(data)
        if length > longest:
            longest = length

    freq_bins = rfftfreq(longest, 1 / rate)

    # Calculate the features to populate a data array
    bundles = []
    bundle = None
    for loc in locations:
        path = "dataset/" + loc
        samplerate, data = wavfile.read(path)
        # Trailing 0s to achieve the same length as the longest instance
        data_length = len(data)
        new_data = np.concatenate((data, np.zeros(longest - data_length)))
        # Compute the fft and add the instances to bundles to compute
        fft = np.round(np.abs(rfft(new_data)), 4)
        instance = np.concatenate([fft])
        if bundle is None:
            bundle = [instance]
        elif len(bundle) < 150:
            bundle = np.append(bundle, [instance], axis=0)
        else:
            bundles.append(bundle)
            bundle = [instance]
    bundles.append(bundle)  # Last remaining bundle
    instances = np.concatenate(bundles, axis=0)

    # Apply a StandardScalar and select n audio features
    scalar = StandardScaler()
    scalar.fit_transform(instances)
    highest_var_indices = np.argpartition(scalar.var_, -n_audio_features)[-n_audio_features:]
    instances = instances[:, highest_var_indices]
    highest_var_features_names = freq_bins[highest_var_indices]
    new_df = pd.DataFrame(data=instances, columns=highest_var_features_names)
    new_df = new_df.sort_index(axis=1)

    # Add the target and return
    new_df["target"] = df["key"]
    return new_df

In [5]:
def df_pipeline() -> pd.DataFrame:
    """
    From the original audio file and json pair dataset, 
    create the dataset from which to work 
    """
    df = pd.read_json("dataset/metadata.json", lines=True)
    df = features_from_audio(df, 1000)
    return df

df = df_pipeline()
print(df.head())
print(df.shape)
df.to_json("data.json", orient="records", lines=True)

       0.0      36.6      36.7      38.0      38.2  38.300000000000004  \
0   3.0074   81.2963   83.7169  119.6888  115.8881            413.4768   
1  13.6523  361.5547  119.7225  170.9554   84.0531            273.8909   
2   7.1155   80.2869  169.7490  149.4092  132.8215            198.8544   
3   2.0998  235.5117  161.2402  241.3578  243.2237             89.7297   
4  16.2152  384.2739  174.5714   84.3117  137.9726            261.8213   

       39.0      39.1      39.2       39.6  ...    466.0    523.0    523.1  \
0  136.8640  817.5328   86.4743   477.6116  ...  15.8853   1.5231   4.9432   
1  175.9083  665.0791  588.2624    84.1962  ...   6.9459  26.9344   9.0805   
2  150.5273  112.4005  335.4482   675.7621  ...   9.3406  18.8055   7.0021   
3  640.7394  121.9373  396.3036  1138.3905  ...  26.2306   8.1640  26.6106   
4  365.8102   96.3555  219.9515   211.2868  ...  39.9084   4.7010  50.1098   

     523.4     524.4    524.5     554.2     554.4    1046.0       target  
0   9.2590 

In [11]:
df = pd.read_json("data.json", lines=True)

df[['note', 'modality']] = pd.DataFrame(df.target.tolist(), index=df.index)
print(df["target"].value_counts())
print(df["note"].value_counts())
print(df["modality"].value_counts())

  df = pd.read_json("data.json", lines=True)
  df = pd.read_json("data.json", lines=True)
  df = pd.read_json("data.json", lines=True)


target
[G, major]     763
[C, major]     676
[A, major]     670
[D, major]     664
[E, major]     602
[F, major]     548
[A, minor]     488
[B, major]     425
[C, minor]     366
[G, minor]     344
[D, minor]     337
[C#, major]    305
[F#, major]    304
[Bb, major]    276
[E, minor]     272
[D#, major]    257
[A#, major]    256
[B, minor]     249
[F, minor]     248
[G#, major]    226
[Ab, major]    191
[F#, minor]    191
[C#, minor]    188
[Eb, major]    178
[A#, minor]    155
[D#, minor]    135
[G#, minor]    134
[Ab, minor]     89
[Eb, minor]     76
[Bb, minor]     72
[Db, major]     66
[Gb, major]     50
[Gb, minor]     25
[Db, minor]     21
Name: count, dtype: int64
note
A     1158
G     1107
C     1042
D     1001
E      874
F      796
B      674
F#     495
C#     493
A#     411
D#     392
G#     360
Bb     348
Ab     280
Eb     254
Db      87
Gb      75
Name: count, dtype: int64
modality
major    6457
minor    3390
Name: count, dtype: int64


### Support Vector Machine Approach