In [1]:
from guitarpro.models import GPException
import os, glob, json
import guitarpro, librosa, scipy
from guitarpro import NoteType
import numpy as np

In [2]:
def get_guitar_tracks(song):
    """
    24 Acoustic Guitar (nylon)
    25 Acoustic Guitar (steel)
    26 Electric Guitar (jazz)
    27 Electric Guitar (clean)
    28 Electric Guitar (muted)
    29 Overdriven Guitar
    30 Distortion Guitar
    """
    GUITAR_MIDI_PROGRAMS = [24, 25, 26, 27, 28, 29, 30]
    # get all non-percussive tracks (this is still necessary because some drum tracks use a guitar program number)
    m_tracks = [track for track in song.tracks if not track.isPercussionTrack]
    guitar_tracks = [
        track
        for track in m_tracks
        if track.channel.instrument in GUITAR_MIDI_PROGRAMS and len(track.strings) == 6
    ]
    return guitar_tracks

def get_note_info(note, bpm, margin=None):
    """
    This is the comprehensive function for generating note-level annotation
    
    `margin` is for passing in the global onset of the segment and calculate the note start time in the segment
    """
    def get_effect_info(effect):
        effect_info = {
            "bend": bool(effect.isBend),  # bool,
            "vibrato": effect.vibrato,  # bool
            "hammer": effect.hammer,  # bool
            "slide": bool(effect.slides),  # bool
        }
        effect_info["bend_type"] = effect.bend.type.name if effect.isBend else None
        effect_info["slide_types"] = (
            [slide.name for slide in effect.slides] if effect.slides else None
        )
        return effect_info

    def get_note_time(note, bpm, margin=None):
        start = note.beat.start
        start_sec = round(((start - 960) / 960) / (bpm / 60), 4)
        # the note timing info encoded in a GP file is global, i.e., the start time in the song
        # I want the start time in the segment, `margin` is the start time of the segment
        if margin:
            start_sec = start_sec - margin
        dur = note.beat.duration.time
        dur_sec = round((dur / 960) / (bpm / 60), 4)
        time = {"start": start_sec, "dur": dur_sec}
        return time

    note_info = {
        "time": get_note_time(note, bpm, margin=margin),
        "string": note.string,
        "fret": note.value,  # fret number
        # "dur_percent": note.durationPercent,
        "pitch": note.realValue,  # self.value + string.value = MIDI note number
        # "type": note.type.name,  # NoteType class, rest=0, normal=1, tie=2, dead=3
        "effects": get_effect_info(note.effect),
    }
    return note_info

In [3]:
def get_single_tracks(
    file,
    output_dir,
    unify_volume=True,
    force_clean=True,
    disable_repeats=True,
    disable_mixTableChange=True,
    disable_other_techniques=True,
    force_normal=True
):
    """Split one multi-track GuitarPro file into several one-track GuitarPro files

    Args:
        file (str): The path to the file to split
        output_dir (str): The directory for the output files
        unify_volume (bool, optional): Whether to adjust the volume of every track to the same level. Defaults to True.
        force_clean (bool, optional): Whether to force all tracks to use the clean electric guitar tone. Defaults to True.
        disable_repeats (bool, optional): Whether to disable all repeats and alternate endings in the GuitarPro file. Defaults to True.
        disable_mixTableChange (bool, optional): Whether to disable mixTableChange instances (e.g., tempo change in the middle of the song). Defaults to True.
        disable_other_techniques (bool, optional): Whether to disable other playing techniques (e.g., grace notes, slidein, slideout). Defaults to True.
        force_normal (bool, optional): Whether to change all note types (rest, dead, tie) to normal. Defaults to True.
    """
    song = guitarpro.parse(file)
    tracks = get_guitar_tracks(song)
    for track in tracks:
        # unify the volume for rendered audio
        if unify_volume:
            # default volume is 120 for newly created tracks
            track.channel.volume = 120
        # force the instrument to be clean electric guitar, so that synthesized audio is automatically clean guitar
        if force_clean:
            track.channel.instrument = 27

        # disable repeats in all measures
        # this includes repeats and alternative endings
        for measure in track.measures:
            if disable_repeats:
                # isRepeatOpen is boolean, repeatClose takes -1 or 1,
                # repeatAlternative can be whatever number, depending on which repeat group it belongs to
                # the following is the default setting in normal bars
                measure.header.isRepeatOpen = False
                measure.header.repeatClose = -1
                measure.header.repeatAlternative = 0
            # disable mixTableChange in all beats
            # this includes tempo changes, which mess up the calculation of note timings
            # and other mysterious effect/instrument changes
            if disable_mixTableChange:
                for voice in measure.voices:
                    for beat in voice.beats:
                        beat.effect.mixTableChange = None
            
            if disable_other_techniques:
                for voice in measure.voices:
                    for beat in voice.beats:
                        beat.effect.fadeIn = False
                        beat.effect.tremoloBar = None
                        for note in beat.notes:
                            note.effect.grace = None
                            note.effect.harmonic = None
                            note.effect.trill = None
                            # slideType values: 1 - shiftSlideTo, 2 - legatoSlideTo, others are ignored
                            note.effect.slides = [slide for slide in note.effect.slides if slide.value in [1, 2]]
                            note.effect.letRing = False

            # force_normal will change the NoteType (tie, rest, dead) to normal,
            # tied notes are very messy and hard to handle, so I choose to turn this on
            if force_normal:
                for voice in measure.voices:
                    for beat in voice.beats:
                        for note in beat.notes:
                            note.type = NoteType.normal

        single_track_song = song  # this preserves the metadata in orginal song
        single_track_song.tracks = [track]
        file_name = "{}_{}.gp5".format(
            file.split("/")[-1].split(".")[0], track.name.replace("/", " ")
        )
        try:
            guitarpro.write(single_track_song, os.path.join(output_dir, file_name))
        except GPException:
            print(f"GPException, removing the corrupt file {file_name}")
            os.remove(os.path.join(output_dir, file_name))


In [2]:
MULTI_TRACK_GTP_DIR = "/Volumes/MacOnly/UG_raw/all_time_top_by_hits"
SINGLE_TRACK_GTP_DIR = "/Volumes/MacOnly/UG_rewrite/all_time_top_by_hits/clean_single_track_gtps"
SINGLE_TRACK_ANNO_DIR = "/Volumes/MacOnly/UG_rewrite/all_time_top_by_hits/clean_single_track_annos"
SINGLE_TRACK_AUDIO_DIR = "/Volumes/MacOnly/UG_rewrite/all_time_top_by_hits/clean_single_track_audio"

In [2]:
MULTI_TRACK_GTP_DIR = "/Volumes/MacOnly/UG_raw/all_time_top_by_rating"
SINGLE_TRACK_GTP_DIR = "/Volumes/MacOnly/UG_rewrite/all_time_top_by_rating/clean_single_track_gtps"
SINGLE_TRACK_ANNO_DIR = "/Volumes/MacOnly/UG_rewrite/all_time_top_by_rating/clean_single_track_annos"
SINGLE_TRACK_AUDIO_DIR = "/Volumes/MacOnly/UG_rewrite/all_time_top_by_rating/clean_single_track_audio"

In [None]:
# generate single track gtps from multi track gtps
i = 0
for file in glob.glob(os.path.join(MULTI_TRACK_GTP_DIR, "*.gp*")):
    print(i)
    get_single_tracks(file, output_dir=SINGLE_TRACK_GTP_DIR, unify_volume=True, force_clean=True, disable_repeats=True, disable_mixTableChange=True, disable_other_techniques=True)
    i += 1

In [6]:
# check that there are no notes in the second voice of each measure
for file in glob.glob(os.path.join(SINGLE_TRACK_GTP_DIR, "*.gp5")):
    song = guitarpro.parse(file)
    track = song.tracks[0]
    for measure in track.measures:
        for beat in measure.voices[1].beats:
            if beat.notes:
                print(file)
                break

In [7]:
# check that every bar has at least one beat, even when it's complete silence
for file in glob.glob(os.path.join(SINGLE_TRACK_GTP_DIR, "*.gp5")):
    song = guitarpro.parse(file)
    track = song.tracks[0]
    for measure in track.measures:
        if len(measure.voices[0].beats) == 0:
            print(file)
            break

In [3]:
def poly_vs_mono_vs_silence(song):
    """Return the time stamps for the start and end of each monophonic / polyphonic / silence segments in the song

    Args:
        song (Song): A pyguitarpro Song object. The song to analyze

    Returns:
        list, list, list: A list of (start, end) time stamps for all mono segments, poly segments, and silence segments
    """
    bpm = song.tempo
    poly_segments = []
    mono_segments = []
    silence_segments = []

    previous_beat_status = -1
    beats = []
    for measure in song.tracks[0].measures:
        voice = measure.voices[0]
        beats.extend(voice.beats)
    for beat in beats:
        onset = beat.start
        onset_sec = round(((onset - 960) / 960) / (bpm / 60), 4)
        dur = beat.duration.time
        dur_sec = round((dur / 960) / (bpm / 60), 4)
        offset_sec = onset_sec + dur_sec
        # 2 for polyphonic, 1 for monophonic, 0 for silence
        if len(beat.notes) == 0:
            beat_status = 0
        elif len(beat.notes) == 1:
            beat_status = 1
        else:
            beat_status = 2
        if beat_status != previous_beat_status:
            # if current beat status is different from the previous beat, add the timing to the output list
            # the following lines can obviously be better written, I leave it like this just for clarity
            if beat_status == 2:
                poly_segments.append([onset_sec, offset_sec])
            elif beat_status == 1:
                mono_segments.append([onset_sec, offset_sec])
            else:
                assert beat_status == 0
                silence_segments.append([onset_sec, offset_sec])
        else:
            # if current beat status is the same as the previous one, update the offset of the previous entry
            if beat_status == 2:
                poly_segments[-1][1] = offset_sec
            elif beat_status == 1:
                mono_segments[-1][1] = offset_sec
            else:
                assert beat_status == 0
                silence_segments[-1][1] = offset_sec
        previous_beat_status = beat_status
    return poly_segments, mono_segments, silence_segments


In [9]:
def gen_anno(file, anno_dir):
    """Generate note-info annotation JSON files, one annotation file per single track GP file

    The input file is a clean single-track GuitarPro file (the whole track). 
    Only mono notes are recorded. 
    
    Args:
        file (str): The path to the single-track GuitarPro file
        anno_dir (str): The directory to put generated JSON file
    """
    song = guitarpro.parse(file)
    bpm = song.tempo
    # put all beats of the song in one place
    beats = []
    for measure in song.tracks[0].measures:
        beats.extend(measure.voices[0].beats)

    note_infos = []

    for beat in beats:
        if len(beat.notes) == 1:
            note = beat.notes[0]
            note_info = get_note_info(note, bpm, margin=None)
            note_infos.append(note_info)

    track_title, _ = os.path.splitext(file.split("/")[-1])
    with open(os.path.join(anno_dir, f"{track_title}.json"), "w") as outfile:
        json.dump(note_infos, outfile, indent=4)


In [None]:
# generate global annotation from single track gtps
i = 0
for file in glob.glob(os.path.join(SINGLE_TRACK_GTP_DIR, "*.gp5")):
    i += 1
    gen_anno(file, anno_dir=SINGLE_TRACK_ANNO_DIR)
    print(i)

In [4]:
# hyperparams for mono detector
N_MFCC = [8, 13, 20, 40]
FRAME_SIZE = [1024, 2048, 4096]

In [None]:
# generate gt for mono detector, using all combinations of n_mfcc and frame_size
for n_mfcc in N_MFCC:
    for frame_size in FRAME_SIZE:
        poly_features_with_label = []
        mono_features_with_label = []

        i = 0
        for audio_file in glob.glob(os.path.join(SINGLE_TRACK_AUDIO_DIR, "*.wav")):
            track_name, _ = os.path.splitext(audio_file.split("/")[-1])
            i += 1
            print(track_name, i)
            gtp_file = os.path.join(SINGLE_TRACK_GTP_DIR, track_name + ".gp5")
            
            y, sr = librosa.load(audio_file, sr=None)
            p, m, _ = poly_vs_mono_vs_silence(guitarpro.parse(gtp_file))

            for poly_timestamp in p:
                start_sp = int(poly_timestamp[0] * sr)
                end_sp = int(poly_timestamp[1] * sr)
                y_segment = y[start_sp : end_sp]
                if len(y_segment) < frame_size:
                    continue

                mel_spec = librosa.feature.melspectrogram(y_segment, sr, n_fft=frame_size, hop_length=frame_size)
                log_mel_spec = librosa.power_to_db(mel_spec)
                mfcc = librosa.feature.mfcc(S=log_mel_spec, n_mfcc=n_mfcc)

                label = np.ones((1, mfcc.shape[1]))
                feature_with_label = np.append(mfcc, label, axis=0)
                poly_features_with_label.append(feature_with_label)

            for mono_timestamp in m:
                start_sp = int(mono_timestamp[0] * sr)
                end_sp = int(mono_timestamp[1] * sr)
                y_segment = y[start_sp : end_sp]
                if len(y_segment) < frame_size:
                    continue

                mel_spec = librosa.feature.melspectrogram(y_segment, sr, n_fft=frame_size, hop_length=frame_size)
                log_mel_spec = librosa.power_to_db(mel_spec)
                mfcc = librosa.feature.mfcc(S=log_mel_spec, n_mfcc=n_mfcc)

                label = np.zeros((1, mfcc.shape[1]))
                feature_with_label = np.append(mfcc, label, axis=0)
                mono_features_with_label.append(feature_with_label)

        poly_data = np.concatenate(poly_features_with_label, axis=1)
        mono_data = np.concatenate(mono_features_with_label, axis=1)

        print(poly_data.shape)
        print(mono_data.shape)

        # combine mono and poly and save to file
        data = np.concatenate((mono_data, poly_data), axis=1)
        data = data.transpose()
        print(data.shape)

        np.save(file=f"/Users/jw/Documents/mono_detector_data/MFCC_{n_mfcc}_FR_{frame_size}.npy", arr=data)

In [11]:
# feature extraction function with resampling
def extract_features(note_audio):
    """The one function that calculates all the features.

    The returned 1D feature vector includes:

    * F0s, F0 diffs
    * MFCCs, MFCC diffs
    * Spectral.timbral features and their diffs

    Args:
        note_audio (array): The audio signal of a note event.

    Returns:
        array: The final feature vector for the input note event.
    """

    def get_all_stats(a):
        """Given a 1D time series, compute the 6 statistics.

        Args:
            a (array): The input time series, of the shape (n,)

        Returns:
            array: An array containing the statistics, of the shape (6,)
        """
        assert a.ndim == 1
        mean = np.mean(a)
        std = np.std(a)
        max = np.max(a)
        min = np.min(a)
        skewness = scipy.stats.skew(a, nan_policy="raise")
        kurtosis = scipy.stats.kurtosis(a, nan_policy="raise")

        stats = np.array([mean, std, max, min, skewness, kurtosis])
        assert stats.shape == (6,)
        return stats

    def extract_mfccs(note_audio):
        """Given the audio signal of a note event, compute the MFCC features.
        
        The MFCC features include the mean and std of the MFCCs aggregated over the note event, 
        and the mean and std of the 1st order differences aggregated over the note event.

        Args:
            note_audio (array): The note event audio signal

        Returns:
            array: The MFCC feature vector, of the shape (80,)
        """
        mfccs = librosa.feature.mfcc(note_audio, sr=sr)
        mfccs_diff = np.diff(mfccs, n=1)
        assert mfccs.shape[1] == mfccs_diff.shape[1] + 1

        mfccs_mean = np.mean(mfccs, axis=1)
        assert mfccs_mean.shape == (20,)
        mfccs_diff_mean = np.mean(mfccs_diff, axis=1)
        assert mfccs_diff_mean.shape == (20,)

        mfccs_std = np.std(mfccs, axis=1)
        assert mfccs_std.shape == (20,)
        mfccs_diff_std = np.std(mfccs_diff, axis=1)
        assert mfccs_diff_std.shape == (20,)

        mfcc_feature = np.concatenate((mfccs_mean, mfccs_diff_mean, mfccs_std, mfccs_diff_std), axis=0)
        assert mfcc_feature.shape == (80,)
        return mfcc_feature

    # get pitch features
    # this uses fill_na=None to give a guess for unvoiced frames, so there's no NaN in f0
    f0, _, _ = librosa.pyin(note_audio, fmin=librosa.note_to_hz("C2"), fmax=librosa.note_to_hz("G6"), sr=sr, fill_na=None)
    # this eliminates the NaNs in the estimated F0
    # pitch = f0[voiced]
    pitch = f0
    pitch_diff = np.diff(pitch, n=1)

    assert pitch.ndim == 1
    assert pitch_diff.ndim == 1
    assert pitch.shape[0] == pitch_diff.shape[0] + 1

    # get spectral/timbral features
    centroid = np.squeeze(librosa.feature.spectral_centroid(note_audio, sr))
    bandwidth = np.squeeze(librosa.feature.spectral_bandwidth(note_audio, sr))
    flatness = np.squeeze(librosa.feature.spectral_flatness(note_audio))
    rolloff = np.squeeze(librosa.feature.spectral_rolloff(note_audio, sr))
    zero_crossing = np.squeeze(librosa.feature.zero_crossing_rate(note_audio))
    flux = librosa.onset.onset_strength(note_audio, sr)
    specs = np.array([centroid, bandwidth, flatness, rolloff, zero_crossing, flux])
    specs_diff = np.diff(specs, n=1)

    assert specs.ndim == 2
    assert specs_diff.ndim == 2
    assert specs.shape[1] == specs_diff.shape[1] + 1

    pitch_stats = get_all_stats(pitch)
    pitch_diff_stats = get_all_stats(pitch_diff)

    assert pitch_stats.shape == (6,)
    assert pitch_diff_stats.shape == (6,)

    feature = np.concatenate((pitch_stats, pitch_diff_stats), axis=0)

    for spec in specs:
        assert spec.ndim == 1
        spec_stats = get_all_stats(spec)
        assert spec_stats.shape == (6,)
        feature = np.concatenate((feature, spec_stats), axis=0)
    for spec_diff in specs_diff:
        assert spec_diff.ndim == 1
        spec_diff_stats = get_all_stats(spec_diff)
        assert spec_diff_stats.shape == (6,)
        feature = np.concatenate((feature, spec_diff_stats), axis=0)
    assert feature.shape == (84,)

    mfcc_feature = extract_mfccs(note_audio)
    feature = np.concatenate((feature, mfcc_feature), axis=0)
    assert feature.shape == (164,)

    return feature

In [12]:
# feature extraction function without resampling
def extract_features(y, sr, frame_size, hop_size):
    mfcc = librosa.feature.mfcc(y, sr=sr, n_fft=frame_size, hop_length=hop_size)
    pitch, _, _ = librosa.pyin(y, fmin=librosa.note_to_hz("C2"), fmax=librosa.note_to_hz("G6"), frame_length=frame_size, hop_length=hop_size, sr=sr, fill_na=None)
    centroid = np.squeeze(librosa.feature.spectral_centroid(y, sr, n_fft=frame_size, hop_length=hop_size))
    bandwidth = np.squeeze(librosa.feature.spectral_bandwidth(y, sr, n_fft=frame_size, hop_length=hop_size))
    flatness = np.squeeze(librosa.feature.spectral_flatness(y, n_fft=frame_size, hop_length=hop_size))
    rolloff = np.squeeze(librosa.feature.spectral_rolloff(y, sr, n_fft=frame_size, hop_length=hop_size))
    zero_crossing = np.squeeze(librosa.feature.zero_crossing_rate(y, frame_length=frame_size, hop_length=hop_size))
    flux = librosa.onset.onset_strength(y, sr, n_fft=frame_size, hop_length=hop_size)

    non_mfccs = np.array([pitch, centroid, bandwidth, flatness, rolloff, zero_crossing, flux])
    features = np.concatenate((mfcc, non_mfccs), axis=0)

    features_delta = librosa.feature.delta(features, order=1)
    features_accel = librosa.feature.delta(features, order=2)

    all_features = np.concatenate((features, features_delta, features_accel), axis=0) # (81, 13127)
    assert all_features.shape[0] == 81
    return all_features

def get_all_stats(a):
    """Given a 2D matrix, compute and concatenate the 6 statistics.

    Args:
        a (array): The input time series, of the shape (n,)

    Returns:
        array: An array containing the statistics, of the shape (6,)
    """
    assert a.ndim == 2
    mean = np.mean(a, axis=1)
    std = np.std(a, axis=1)
    max = np.max(a, axis=1)
    min = np.min(a, axis=1)
    skewness = scipy.stats.skew(a, axis=1, nan_policy="raise")
    kurtosis = scipy.stats.kurtosis(a, axis=1, nan_policy="raise")

    stats = np.concatenate((mean, std, max, min, skewness, kurtosis), axis=0)
    assert stats.shape[0] == a.shape[0] * 6
    return stats

In [6]:
# best parameter combination from note-event separation
frame_size = 1024
hop_size = 512

In [None]:
# generate data for note events/transition events 
notes_data = []
trans_data = []
j = 0
for audio_file in glob.glob(os.path.join(SINGLE_TRACK_AUDIO_DIR, "*.wav")):
    track_name, _ = os.path.splitext(audio_file.split("/")[-1])
    j += 1
    print(track_name, j)
    anno_file = os.path.join(SINGLE_TRACK_ANNO_DIR, track_name + ".json")

    y, sr = librosa.load(audio_file, sr=None)
    all_features = extract_features(y, sr, frame_size, hop_size)

    with open(anno_file) as anno:
        annotation = json.load(anno)

    for i in range(len(annotation)):
        note_event = annotation[i]

        onset = note_event["time"]["start"]
        dur = note_event["time"]["dur"]
        offset = onset + dur
        onset_fr = librosa.time_to_frames(onset, sr=sr, hop_length=hop_size)
        offset_fr = librosa.time_to_frames(offset, sr=sr, hop_length=hop_size)

        # if note duration is shorter than one frame, discard it
        if offset_fr - onset_fr < 1:
            continue

        note_feature = all_features[:, onset_fr : offset_fr+1]
        note_aggregation = get_all_stats(note_feature)

        if note_event["effects"]["vibrato"]:
            label = 2
        elif note_event["effects"]["bend"]:
            label = 1
        else:
            label = 0
        feature_w_label = np.append(note_aggregation, label)
        notes_data.append(feature_w_label)

        # if the note event is the last one in the song, ignore its transitions 
        if i == len(annotation) - 1:
            break
        
        next_note_event = annotation[i + 1]
        # if the next note event doesn't immediately follow the current note event, ignore the transition
        if next_note_event["time"]["start"] - offset > 0.05:
            continue

        tran_onset_fr = offset_fr - 2
        tran_offset_fr = offset_fr + 2
        tran_feature = all_features[:, tran_onset_fr : tran_offset_fr+1]
        tran_aggregation = get_all_stats(tran_feature)
        
        if note_event["effects"]["hammer"]:
            if note_event["pitch"] < next_note_event["pitch"]:
                # hammer-on
                tran_label = 3
            elif note_event["pitch"] > next_note_event["pitch"]:
                # pull-off
                tran_label = 4
            else:
                tran_label = 0
        elif note_event["effects"]["slide"]:
            tran_label = 5
        else:
            tran_label = 0
        tran_feature_w_label = np.append(tran_aggregation, tran_label)
        trans_data.append(tran_feature_w_label)

In [None]:
u_notes_data = np.unique(notes_data, axis=0)
u_trans_data = np.unique(trans_data, axis=0)
print(u_notes_data.shape)
print(u_trans_data.shape)

In [None]:
np.save("/Users/jw/Documents/extensive_features/hits_data_notes.npy", u_notes_data)
u_notes_data.shape

In [None]:
np.save("/Users/jw/Documents/extensive_features/hits_data_trans.npy", u_trans_data)
u_trans_data.shape

In [None]:
label = u_notes_data[:, -1]
normal = label == 0
bend = label == 1
vibrato = label == 2
hammer = label == 3
pull = label == 4
slide = label == 5
print(f"normal: {np.sum(normal)}, bend: {np.sum(bend)}, vibrato: {np.sum(vibrato)}, hammer: {np.sum(hammer)}, pull: {np.sum(pull)}, slide: {np.sum(slide)}")

In [None]:
label = u_trans_data[:, -1]
normal = label == 0
bend = label == 1
vibrato = label == 2
hammer = label == 3
pull = label == 4
slide = label == 5
print(f"normal: {np.sum(normal)}, bend: {np.sum(bend)}, vibrato: {np.sum(vibrato)}, hammer: {np.sum(hammer)}, pull: {np.sum(pull)}, slide: {np.sum(slide)}")

In [None]:
notes_data = np.array(notes_data)
trans_data = np.array(trans_data)
print(notes_data.shape, trans_data.shape)

In [None]:
hits_data = np.concatenate((notes_data, trans_data), axis=0)
hits_data.shape

In [None]:
rating_data = np.concatenate((notes_data, trans_data), axis=0)
rating_data.shape

In [None]:
u_hits_data = np.unique(hits_data, axis=0)
np.save("/Users/jw/Documents/extensive_features/hits_data.npy", u_hits_data)
u_hits_data.shape

In [None]:
u_rating_data = np.unique(rating_data, axis=0)
np.save("/Users/jw/Documents/final_features/rating_data.npy", u_rating_data)
u_rating_data.shape