Basic import statements for the project

In [1]:
#import statements
import pandas as pd
import matplotlib.pyplot as plt
#glob is useful for working with filepaths
import glob
import math
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import re

1. Function for returning a list of filenames for the dance data

In [2]:
def get_file_list():
    #Saving all of the filepaths in data
    data = []
    for folder in glob.glob("../data/*"):
        if (folder[-3:] != '.md'):
            data.append(glob.glob(folder+'/*')[0])
    return data
        
files = get_file_list()

The following cell defines the columns for our data.

In [3]:
data_columns = ['head_x', 'head_y', 'head_z',
               'neck_x', 'neck_y', 'neck_z',
               'spine_x', 'spine_y', 'spine_z',
               'hip_x', 'hip_y', 'hip_z',
               'shoulderl_x', 'shoulderl_y', 'shoulderl_z',
               'shoulderr_x', 'shoulderr_y', 'shoulderr_z',
               'elbowl_x', 'elbowl_y', 'elbowl_z',
               'elbowr_x', 'elbowr_y', 'elbowr_z',
               'wristl_x', 'wristl_y', 'wristl_z',
               'wristr_x', 'wristr_y', 'wristr_z',
               'handl_x', 'handl_y', 'handl_z',
               'handr_x', 'handr_y', 'handr_z',
               'handtipl_x', 'handtipl_y', 'handtipl_z',
               'handtipr_x', 'handtipr_y', 'handtipr_z',
               'hipl_x', 'hipl_y', 'hipl_z',
               'hipr_x', 'hipr_y', 'hipr_z',
               'kneel_x', 'kneel_y', 'kneel_z',
               'kneer_x', 'kneer_y', 'kneer_z',
               'anklel_x', 'anklel_y', 'anklel_z',
               'ankler_x', 'ankler_y', 'ankler_z',
               'footl_x', 'footl_y', 'footl_z',
               'footr_x', 'footr_y', 'footr_z']

In [4]:
def create_df(filename):
    array2d = []
    
    fp = open(filename, 'r')
    line = fp.readline()

    while line:
        frame = line.split()
        for i in range(len(frame)):
            frame[i] = float(frame[i])
        array2d.append(frame)
        line = fp.readline()

    df = pd.DataFrame(array2d, columns = data_columns)

    df["square_handtip_distance"] = (df['handtipl_x'] - df['handtipr_x'])**2 + (df['handtipl_y'] - df['handtipr_y'])**2 + (df['handtipl_z'] - df['handtipr_z'])**2

    non_zero = df[df["square_handtip_distance"] > 0]
    #currently is just an estimation, taking the first frame where the hand distance is less than 0.1
    clap_frame = non_zero[non_zero["square_handtip_distance"] < 0.1].index[0].copy()
    #add columns for time stamp and beat index
    df["time_stamp"] = 0
    df["beat_index"] = 0
    clap_onwards = df.iloc[clap_frame:]
    clap_onwards
    return clap_onwards, clap_frame

In [5]:
#setting up spotify credentials
def set_spotify():
    client_id = 'd0b2731526744c759fcf012a56ec5bd5'
    client_secret = '6e593cabd0e043da9041c5ef5825dec7'

    #Sets up authentication to use the Spotify API
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    #Creates a Spotipy session using the credentials
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    return sp

sp = set_spotify()

In [6]:
#Search for song id and get beats object
#This will be automated in a playlist once that is set up
name = "Billie Eilish"
result = sp.search(name)
artist_uri = result['tracks']['items'][0]['artists'][0]["uri"]
sp_albums = sp.artist_albums(artist_uri, album_type='album')

tracks = sp.album_tracks(sp_albums['items'][0]['uri'])
id = tracks['items'][1]['id']

def get_beats(song_id, filename):
    analysis = sp.audio_analysis(id)
    features = sp.audio_features(id)
    #Starting beat will change depending on song
    eight_counts = int(re.search("\d+.txt", filename).group()[0])
    beat_count = eight_counts * 8 #not sure if this should be 4 or eight, we will know when we can visualize
    beats = analysis['beats'][beat_count:]
    return beats


In [7]:
def add_beats(dance, beats, clap_frame, filename):
    eight_counts = int(re.search("\d+.txt", filename).group()[0])
    beat_count = eight_counts * 8 #not sure if this should be 4 or eight, we will know when we can visualize
    #add time stamps to dataframe
    dance.loc[clap_frame, "time_stamp"] = beats[beat_count]["start"]

    current_beat = 0
    #for each row in the data frame...
    for index, row in dance.iterrows():
        time_stamp = beats[0]["start"] + 0.0666666666*(index-clap_frame)
        dance.loc[index, "time_stamp"] = time_stamp #set time stamp for each frame
        if current_beat < len(beats) - 1:
            if beats[current_beat + 1]["start"] < time_stamp:
                current_beat += 1
        dance.loc[index, "beat_index"] = current_beat #set beat index for each frame

    clap_to_end = dance[dance["time_stamp"] < beats[-1]["start"]].copy() #cut off frames where song ends

    return clap_to_end

In [8]:
def extrapolate_id(dataname):
    id_container = re.search("/\w{22}_", dataname).group()
    track_id = id_container[1:-1]
    return track_id

In [9]:
#parse_song takes in a filename and returns a dataframe of the dance data along with the beat intervals
def parse_song(filename):
    dance, clap_frame = create_df(filename)
    song_id = extrapolate_id(filename)
    beats = get_beats(song_id, filename)
    return add_beats(dance, beats, clap_frame, filename)

In [10]:
#segment_beats takes the result of parse song and returns a list of dataframes of individual beats to add to training set
def segment_beats(dance_data):
    #groups the dance data by their beat index
    groups = dance_data.groupby('beat_index')
    #initialize empty list to populate with song beats
    song_beats = []
    #iterate through each group and append to song_beats
    for name, group in groups:
        song_beats.append(group)
    return song_beats

In [11]:
filename = '../data/BadGuy_BillieEilish1/2Fxmhks0bxGSBdJ92vM42m_date12_20_BadGuy-BillieEilish_2.txt'
dance, clap_frame = create_df(filename)
song_id = extrapolate_id(filename)
beats = get_beats(song_id, filename)
new_dance = add_beats(dance, beats, clap_frame, filename)
curr_frame = clap_frame
start = [name + '_0' for name in data_columns]
mid = [name + '_1/2' for name in data_columns]
end = [name + '_1' for name in data_columns]
cols = start + mid + end
beat_collect = pd.DataFrame(columns = cols)
last_frame = new_dance.index[-1]
times = []

beats_df_start = pd.DataFrame(columns = ["start_time"] + data_columns)
beats_df_mid = pd.DataFrame(columns = ["start_time"] + data_columns)
beats_df_end = pd.DataFrame(columns = ["start_time"] + data_columns)


#last beat is not included because it was excluded from the training set
##Start of Beat
for beat in beats[:-1]:
    beat_data = [beat["start"]]
    if (curr_frame < last_frame):
        while new_dance.loc[curr_frame + 1]["time_stamp"] < beat["start"]:
            curr_frame += 1
        if new_dance.loc[curr_frame + 1]["time_stamp"] > beat["start"]:
            #pinpointing position at beat
            #only for head_x right now, needs to be expanded to all points
            for point in data_columns:
                f1 = new_dance.loc[curr_frame]
                f2 = new_dance.loc[curr_frame + 1]
                beat_pos = f1[point] + (beat["start"] - f1.time_stamp) * ((f2[point] - f1[point]) / (f2.time_stamp - f1.time_stamp))
                #print(beat_pos)
                beat_data.append(beat_pos)
                #times += [beat["start"]]
    #creating a 1 row df for the beat
    beat_line = pd.DataFrame([beat_data], columns = ["start_time"] + data_columns)
    beats_df_start = beats_df_start.append(beat_line)

curr_frame = clap_frame
last_frame = new_dance.index[-1]

#Middle of Beat
for beat in beats[:-1]:
    beat_data = [beat["start"]]
    beat_mid = beat["start"] + beat["duration"] / 2
    if (curr_frame < last_frame):
        while new_dance.loc[curr_frame + 1]["time_stamp"] < beat_mid:
            curr_frame += 1
        if new_dance.loc[curr_frame + 1]["time_stamp"] > beat_mid:
            #pinpointing position at beat
            #only for head_x right now, needs to be expanded to all points
            for point in data_columns:
                f1 = new_dance.loc[curr_frame]
                f2 = new_dance.loc[curr_frame + 1]
                beat_pos = f1[point] + (beat_mid - f1.time_stamp) * ((f2[point] - f1[point]) / (f2.time_stamp - f1.time_stamp))
                beat_data.append(beat_pos)
                #times += [beat["start"]]
    #creating a 1 row df for the beat
    beat_line = pd.DataFrame([beat_data], columns = ["start_time"] + data_columns)
    beats_df_mid = beats_df_mid.append(beat_line)

#End of Beat
curr_frame = clap_frame
last_frame = new_dance.index[-1]
for beat in beats[:-2]:
    beat_data = [beat["start"]]
    beat_end = beat["start"] + beat["duration"]
    if (curr_frame < last_frame):
        while new_dance.loc[curr_frame + 1]["time_stamp"] < beat_end:
            curr_frame += 1
        if new_dance.loc[curr_frame + 1]["time_stamp"] > beat_end:
            #pinpointing position at beat
            #only for head_x right now, needs to be expanded to all points
            for point in data_columns:
                f1 = new_dance.loc[curr_frame]
                f2 = new_dance.loc[curr_frame + 1]
                beat_pos = f1[point] + (beat_end - f1.time_stamp) * ((f2[point] - f1[point]) / (f2.time_stamp - f1.time_stamp))
                beat_data.append(beat_pos)
                #times += [beat["start"]]
    #creating a 1 row df for the beat
    beat_line = pd.DataFrame([beat_data], columns = ["start_time"] + data_columns)
    beats_df_end = beats_df_end.append(beat_line)

beats_df_end = beats_df_end.append(new_dance.loc[last_frame][:66])
beats_df_end.loc[last_frame, "start_time"] = beats[-1]["start"]
beats_df_end

Unnamed: 0,start_time,head_x,head_y,head_z,neck_x,neck_y,neck_z,spine_x,spine_y,spine_z,...,anklel_z,ankler_x,ankler_y,ankler_z,footl_x,footl_y,footl_z,footr_x,footr_y,footr_z
0,7.35531,-1.575800,0.747951,2.575751,-1.614782,0.645403,2.552382,-1.608556,0.341040,2.584303,...,2.627048,-2.042914,-0.624422,2.669389,-1.864066,-0.694018,2.493217,-1.954082,-0.693986,2.579198
0,7.80114,-1.457394,0.772681,2.525010,-1.489446,0.675013,2.475426,-1.487982,0.361517,2.497917,...,2.239192,-1.780508,-0.686804,2.531553,-1.522853,-0.682926,2.197519,-1.694263,-0.752408,2.533886
0,8.24776,-1.296669,0.772820,2.559892,-1.333151,0.689500,2.488695,-1.368505,0.348485,2.493834,...,2.347090,-1.417936,-0.435471,2.489161,-1.539069,-0.735997,2.273459,-1.310347,-0.465069,2.399643
0,8.69396,-1.135087,0.773205,2.663298,-1.162795,0.686096,2.598811,-1.177308,0.341251,2.581594,...,2.378509,-1.480426,-0.679371,2.574255,-1.496567,-0.744893,2.319042,-1.514966,-0.745401,2.604525
0,9.13935,-0.966569,0.803460,2.757054,-1.009721,0.713185,2.688989,-1.004530,0.370113,2.667036,...,2.371597,-1.076487,-0.749806,2.635045,-1.483307,-0.747171,2.310558,-1.007223,-0.765540,2.564175
0,9.58236,-0.809785,0.789584,2.787065,-0.843595,0.695203,2.726663,-0.856358,0.352672,2.701154,...,2.386957,-1.056859,-0.766801,2.630689,-1.387814,-0.719119,2.314335,-0.950673,-0.791055,2.561018
0,10.02693,-0.629781,0.778012,2.780214,-0.654562,0.677952,2.720303,-0.645840,0.338471,2.709722,...,2.618122,-1.199266,-0.679102,2.626200,-0.973611,-0.822351,2.479220,-1.094310,-0.774343,2.594343
0,10.47149,-0.481753,0.822309,2.762600,-0.499160,0.700667,2.730140,-0.471811,0.362508,2.737530,...,2.604807,-1.063479,-0.662012,2.602842,-0.916584,-0.714201,2.523046,-1.009364,-0.741412,2.619027
0,10.91604,-0.297968,0.794263,2.818054,-0.348021,0.676673,2.783202,-0.320861,0.345297,2.776486,...,2.765610,-0.464370,-0.749289,2.685210,-0.773671,-0.780648,2.676680,-0.442924,-0.830855,2.569947
0,11.36058,-0.121732,0.784153,2.896921,-0.169096,0.658824,2.878160,-0.148214,0.348240,2.863534,...,2.659128,-0.266128,-0.716288,2.881797,-0.429365,-0.811050,2.561127,-0.364917,-0.781174,2.679803


In [12]:
beats = []
for file in files:
    print(file)
    song = parse_song(file)
    song_beats = segment_beats(song)
    beats = beats + song_beats
    
beats

../data/Sorry_JustinBeiber1/09CtPGIpYB4BrO8qb1RGsF_date12_20_Sorry_JustinBieber_2.txt
../data/Pony_Genuwine/6mz1fBdKATx6qP4oP1I65G_date12_21_Pony_Ginuwine_2.txt
../data/Wannabe_SpiceGirls/1Je1IMUlBXcx1Fz0WE7oPT_date12_23_Wannabe_SpiceGirls_0.txt
../data/ConCalma_DaddyYankee/5w9c2J52mkdntKOmRLeM2m_date12_23_ConCalma_DaddyYankee_2.txt
../data/Motivation_Normani/0rIAC4PXANcKmitJfoqmVm_date12_21_Motivation_Normani_4.txt
../data/SideToSide_ArianaGrande/1pKeFVVUOPjFsOABub0OaV_date12_23_SideToSide_ArianaGrande_4.txt
../data/CrazyInLove_Beyonce/5IVuqXILoxVWvWEPm82Jxr_date12_23_CrazyInLove_Beyonce_3.txt
../data/SweatDreams_Weslee/7hySbX6I73xmWGN9HERH3u_date12_21_SweatDreams_Weslee_1.txt
../data/WhyYouAlwaysHating_YG/39hnH8WdPmNT3Q3yzwC9Rg_date12_21_WhyYouAlwaysHating_YG_2.txt
../data/InMyFeelings_Drake/2G7V7zsVDxg1yRsu7Ew9RJ_date12_20_InMyFeelings_Drake_4.txt
../data/BolaRebola_Tropkillaz/1OUPXna2MCgAt3VNmXJBtg_date12_20_BolaRebola_Tropkillaz_2.txt
../data/OneTwoStep_Ciara/7uKcScNXuO3MWw6LowBjW

[       head_x    head_y   head_z    neck_x    neck_y   neck_z   spine_x  \
 562 -0.317271  0.824231  2.96681 -0.269299  0.688692  2.96260 -0.256158   
 563 -0.339813  0.825126  2.96823 -0.291329  0.689269  2.96464 -0.275051   
 564 -0.339813  0.824531  2.96588 -0.291329  0.689244  2.96499 -0.275051   
 565 -0.347022  0.824531  2.96588 -0.300842  0.689244  2.96499 -0.286862   
 566 -0.350690  0.805303  2.89327 -0.308049  0.669969  2.89190 -0.292853   
 567 -0.358138  0.804705  2.89182 -0.318233  0.668066  2.89137 -0.303620   
 568 -0.366099  0.805883  2.89135 -0.327935  0.666910  2.89150 -0.312856   
 
       spine_y  spine_z     hip_x  ...  ankler_z   footl_x   footl_y  footl_z  \
 562  0.402921  3.01076 -0.242708  ...   0.00000  0.000000  0.000000  0.00000   
 563  0.401976  3.01462 -0.258012  ...   0.00000  0.000000  0.000000  0.00000   
 564  0.403358  3.01576 -0.258012  ...   0.00000  0.000000  0.000000  0.00000   
 565  0.403358  3.01576 -0.272244  ...   0.00000  0.000000  0.0000