Basic import statements for the project

In [1]:
#import statements
import pandas as pd
#glob is useful for working with filepaths
import glob
import math
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import re

Function for returning a list of filenames for the dance data

In [2]:
def get_file_list():
    #Saving all of the filepaths in data
    data = []
    for folder in glob.glob("../data/*"):
        if (folder[-3:] != '.md'):
            data.append(glob.glob(folder+'/*')[0])
    return data
        
files = get_file_list()

The following cell defines the columns for our data.

In [3]:
data_columns = ['head_x', 'head_y', 'head_z',
               'neck_x', 'neck_y', 'neck_z',
               'spine_x', 'spine_y', 'spine_z',
               'hip_x', 'hip_y', 'hip_z',
               'shoulderl_x', 'shoulderl_y', 'shoulderl_z',
               'shoulderr_x', 'shoulderr_y', 'shoulderr_z',
               'elbowl_x', 'elbowl_y', 'elbowl_z',
               'elbowr_x', 'elbowr_y', 'elbowr_z',
               'wristl_x', 'wristl_y', 'wristl_z',
               'wristr_x', 'wristr_y', 'wristr_z',
               'handl_x', 'handl_y', 'handl_z',
               'handr_x', 'handr_y', 'handr_z',
               'handtipl_x', 'handtipl_y', 'handtipl_z',
               'handtipr_x', 'handtipr_y', 'handtipr_z',
               'hipl_x', 'hipl_y', 'hipl_z',
               'hipr_x', 'hipr_y', 'hipr_z',
               'kneel_x', 'kneel_y', 'kneel_z',
               'kneer_x', 'kneer_y', 'kneer_z',
               'anklel_x', 'anklel_y', 'anklel_z',
               'ankler_x', 'ankler_y', 'ankler_z',
               'footl_x', 'footl_y', 'footl_z',
               'footr_x', 'footr_y', 'footr_z']

In [14]:
def create_df(filename):
    array2d = []
    
    fp = open(filename, 'r')
    line = fp.readline()

    while line:
        frame = line.split()
        for i in range(len(frame)):
            frame[i] = float(frame[i])
        array2d.append(frame)
        line = fp.readline()

    df = pd.DataFrame(array2d, columns = data_columns)

    df["square_handtip_distance"] = (df['handtipl_x'] - df['handtipr_x'])**2 + (df['handtipl_y'] - df['handtipr_y'])**2 + (df['handtipl_z'] - df['handtipr_z'])**2

    non_zero = df[df["square_handtip_distance"] > 0]
    #currently is just an estimation, taking the first frame where the hand distance is less than 0.1
    clap_frame = non_zero[non_zero["square_handtip_distance"] < 0.1].index[0].copy()
    #add columns for time stamp and beat index
    df["time_stamp"] = 0
    df["beat_index"] = 0
    clap_onwards = df.iloc[clap_frame:]
    clap_onwards
    return clap_onwards, clap_frame

In [15]:
#setting up spotify credentials
def set_spotify():
    client_id = 'd0b2731526744c759fcf012a56ec5bd5'
    client_secret = '6e593cabd0e043da9041c5ef5825dec7'

    #Sets up authentication to use the Spotify API
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    #Creates a Spotipy session using the credentials
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    return sp

sp = set_spotify()

In [16]:
#Search for song id and get beats object
#This will be automated in a playlist once that is set up
name = "Billie Eilish"
result = sp.search(name)
artist_uri = result['tracks']['items'][0]['artists'][0]["uri"]
sp_albums = sp.artist_albums(artist_uri, album_type='album')

tracks = sp.album_tracks(sp_albums['items'][0]['uri'])
id = tracks['items'][1]['id']

def get_beats(song_id):
    analysis = sp.audio_analysis(id)
    features = sp.audio_features(id)
    #Starting beat will change depending on song
    beats = analysis['beats'][16:]
    return beats

In [17]:
def add_beats(dance, beats, clap_frame):
    #add time stamps to dataframe
    dance.loc[clap_frame, "time_stamp"] = beats[0]["start"]

    current_beat = 0
    #for each row in the data frame...
    for index, row in dance.iterrows():
        time_stamp = beats[0]["start"] + 0.0666666666*(index-clap_frame)
        dance.loc[index, "time_stamp"] = time_stamp #set time stamp for each frame
        if current_beat < len(beats) - 1:
            if beats[current_beat + 1]["start"] < time_stamp:
                current_beat += 1
        dance.loc[index, "beat_index"] = current_beat #set beat index for each frame

    clap_to_end = dance[dance["time_stamp"] < beats[-1]["start"]].copy() #cut off frames where song ends

    return clap_to_end

In [18]:
def extrapolate_id(dataname):
    id_container = re.search("/\w{22}_", dataname).group()
    track_id = id_container[1:-1]
    return track_id

In [19]:
#parse_song takes in a filename and returns a dataframe of the dance data along with the beat intervals
def parse_song(filename):
    dance, clap_frame = create_df(filename)
    song_id = extrapolate_id(filename)
    beats = get_beats(song_id)
    return add_beats(dance, beats, clap_frame)
    
parse_song('../data/BadGuy_BillieEilish1/2Fxmhks0bxGSBdJ92vM42m_date12_20_BadGuy-BillieEilish_2.txt')

Unnamed: 0,head_x,head_y,head_z,neck_x,neck_y,neck_z,spine_x,spine_y,spine_z,hip_x,...,ankler_z,footl_x,footl_y,footl_z,footr_x,footr_y,footr_z,square_handtip_distance,time_stamp,beat_index
1765,-1.706370,0.725889,2.67762,-1.696840,0.645058,2.72175,-1.697920,0.329761,2.73161,-1.691500,...,0.00000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.042358,7.355310,0
1766,-1.688630,0.721881,2.61354,-1.675750,0.629170,2.64882,-1.684840,0.319918,2.65890,-1.683570,...,0.00000,0.000000,0.000000,0.00000,0.000000,0.000000,0.00000,0.028198,7.421977,0
1767,-1.646220,0.754192,2.63468,-1.669780,0.631666,2.60286,-1.672500,0.330371,2.63339,-1.666650,...,0.00000,-1.817250,0.000000,0.00000,0.000000,0.000000,0.00000,0.053040,7.488643,0
1768,-1.626740,0.739920,2.60829,-1.656090,0.631869,2.59735,-1.654860,0.332012,2.62712,-1.644420,...,2.54352,-1.904590,-0.650530,2.31067,-1.825510,0.000000,0.00000,0.038153,7.555310,0
1769,-1.597490,0.751685,2.60128,-1.640210,0.643806,2.58054,-1.636340,0.338552,2.61456,-1.624250,...,2.66048,-1.899150,-0.686339,2.45198,-1.917440,-0.717382,2.46040,0.100056,7.621977,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4509,0.022003,0.776354,3.39874,0.002209,0.635540,3.36165,-0.045091,0.333672,3.34583,-0.086369,...,3.34297,-0.419441,-0.767751,3.22244,0.031548,-0.813612,3.26280,0.368100,190.288643,402
4510,0.021558,0.777603,3.39816,0.005981,0.635981,3.35987,-0.042606,0.334130,3.34460,-0.085853,...,3.34278,-0.415628,-0.747796,3.18992,0.031262,-0.812825,3.26372,0.320911,190.355310,402
4511,0.019085,0.779148,3.39694,0.015466,0.636476,3.35739,-0.035856,0.334654,3.34254,-0.084272,...,3.34259,-0.400058,-0.768252,3.20401,0.031117,-0.812188,3.26444,0.391479,190.421976,402
4512,0.017885,0.782717,3.39075,0.017859,0.637946,3.34522,-0.033450,0.335980,3.33348,-0.083168,...,3.34259,-0.387427,-0.781709,3.20700,0.031049,-0.811688,3.26500,0.425627,190.488643,402
