Basic import statements for the project

In [1]:
#import statements
import pandas as pd
import matplotlib.pyplot as plt
#glob is useful for working with filepaths
import glob
import math
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import re

1. Function for returning a list of filenames for the dance data

In [2]:
def get_file_list():
    #Saving all of the filepaths in data
    data = []
    for folder in glob.glob("../data/*"):
        if (folder[-3:] != '.md'):
            data.append(glob.glob(folder+'/*')[0])
    return data
        
files = get_file_list()

The following cell defines the columns for our data.

In [3]:
data_columns = ['head_x', 'head_y', 'head_z',
               'neck_x', 'neck_y', 'neck_z',
               'spine_x', 'spine_y', 'spine_z',
               'hip_x', 'hip_y', 'hip_z',
               'shoulderl_x', 'shoulderl_y', 'shoulderl_z',
               'shoulderr_x', 'shoulderr_y', 'shoulderr_z',
               'elbowl_x', 'elbowl_y', 'elbowl_z',
               'elbowr_x', 'elbowr_y', 'elbowr_z',
               'wristl_x', 'wristl_y', 'wristl_z',
               'wristr_x', 'wristr_y', 'wristr_z',
               'handl_x', 'handl_y', 'handl_z',
               'handr_x', 'handr_y', 'handr_z',
               'handtipl_x', 'handtipl_y', 'handtipl_z',
               'handtipr_x', 'handtipr_y', 'handtipr_z',
               'hipl_x', 'hipl_y', 'hipl_z',
               'hipr_x', 'hipr_y', 'hipr_z',
               'kneel_x', 'kneel_y', 'kneel_z',
               'kneer_x', 'kneer_y', 'kneer_z',
               'anklel_x', 'anklel_y', 'anklel_z',
               'ankler_x', 'ankler_y', 'ankler_z',
               'footl_x', 'footl_y', 'footl_z',
               'footr_x', 'footr_y', 'footr_z']

In [4]:
def create_df(filename):
    array2d = []
    
    fp = open(filename, 'r')
    line = fp.readline()

    while line:
        frame = line.split()
        for i in range(len(frame)):
            frame[i] = float(frame[i])
        array2d.append(frame)
        line = fp.readline()

    df = pd.DataFrame(array2d, columns = data_columns)

    df["square_handtip_distance"] = (df['handtipl_x'] - df['handtipr_x'])**2 + (df['handtipl_y'] - df['handtipr_y'])**2 + (df['handtipl_z'] - df['handtipr_z'])**2

    non_zero = df[df["square_handtip_distance"] > 0]
    #currently is just an estimation, taking the first frame where the hand distance is less than 0.1
    clap_frame = non_zero[non_zero["square_handtip_distance"] < 0.1].index[0].copy()
    #add columns for time stamp and beat index
    df["time_stamp"] = 0
    df["beat_index"] = 0
    clap_onwards = df.iloc[clap_frame:]
    clap_onwards
    return clap_onwards, clap_frame

In [5]:
#setting up spotify credentials
def set_spotify():
    client_id = 'd0b2731526744c759fcf012a56ec5bd5'
    client_secret = '6e593cabd0e043da9041c5ef5825dec7'

    #Sets up authentication to use the Spotify API
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    #Creates a Spotipy session using the credentials
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    return sp

sp = set_spotify()

In [6]:
#Search for song id and get beats object
#This will be automated in a playlist once that is set up
name = "Billie Eilish"
result = sp.search(name)
artist_uri = result['tracks']['items'][0]['artists'][0]["uri"]
sp_albums = sp.artist_albums(artist_uri, album_type='album')

tracks = sp.album_tracks(sp_albums['items'][0]['uri'])
id = tracks['items'][1]['id']

def get_beats(song_id, filename):
    analysis = sp.audio_analysis(id)
    features = sp.audio_features(id)
    #Starting beat will change depending on song
    eight_counts = int(re.search("\d+.txt", filename).group()[0])
    beat_count = eight_counts * 8 #not sure if this should be 4 or eight, we will know when we can visualize
    beats = analysis['beats'][beat_count:]
    return beats


In [7]:
def add_beats(dance, beats, clap_frame, filename):
    eight_counts = int(re.search("\d+.txt", filename).group()[0])
    beat_count = eight_counts * 8 #not sure if this should be 4 or eight, we will know when we can visualize
    #add time stamps to dataframe
    dance.loc[clap_frame, "time_stamp"] = beats[beat_count]["start"]

    current_beat = 0
    #for each row in the data frame...
    for index, row in dance.iterrows():
        time_stamp = beats[0]["start"] + 0.0666666666*(index-clap_frame)
        dance.loc[index, "time_stamp"] = time_stamp #set time stamp for each frame
        if current_beat < len(beats) - 1:
            if beats[current_beat + 1]["start"] < time_stamp:
                current_beat += 1
        dance.loc[index, "beat_index"] = current_beat #set beat index for each frame

    clap_to_end = dance[dance["time_stamp"] < beats[-1]["start"]].copy() #cut off frames where song ends

    return clap_to_end

In [8]:
def extrapolate_id(dataname):
    id_container = re.search("/\w{22}_", dataname).group()
    track_id = id_container[1:-1]
    return track_id

In [9]:
#parse_song takes in a filename and returns a dataframe of the dance data along with the beat intervals
def parse_song(filename):
    dance, clap_frame = create_df(filename)
    song_id = extrapolate_id(filename)
    beats = get_beats(song_id, filename)
    return add_beats(dance, beats, clap_frame, filename)

In [10]:
#segment_beats takes the result of parse song and returns a list of dataframes of individual beats to add to training set
def segment_beats(dance_data):
    #groups the dance data by their beat index
    groups = dance_data.groupby('beat_index')
    #initialize empty list to populate with song beats
    song_beats = []
    #iterate through each group and append to song_beats
    for name, group in groups:
        song_beats.append(group)
    return song_beats

In [13]:
def standardize_beats(filename):
    dance, clap_frame = create_df(filename)
    song_id = extrapolate_id(filename)
    beats = get_beats(song_id, filename)
    new_dance = add_beats(dance, beats, clap_frame, filename)
    curr_frame = clap_frame
    beat_collect = pd.DataFrame(columns = cols)
    last_frame = new_dance.index[-1]
    times = []

    beats_df_start = pd.DataFrame(columns = ["start_time"] + data_columns)
    beats_df_mid = pd.DataFrame(columns = ["start_time"] + data_columns)
    beats_df_end = pd.DataFrame(columns = ["start_time"] + data_columns)


    #last beat is not included because it was excluded from the training set
    ##Start of Beat
    for beat in beats[:-1]:
        beat_data = [beat["start"]]
        if (curr_frame < last_frame):
            while new_dance.loc[curr_frame + 1]["time_stamp"] < beat["start"]:
                curr_frame += 1
            if new_dance.loc[curr_frame + 1]["time_stamp"] > beat["start"]:
                #pinpointing position at beat
                #only for head_x right now, needs to be expanded to all points
                for point in data_columns:
                    f1 = new_dance.loc[curr_frame]
                    f2 = new_dance.loc[curr_frame + 1]
                    beat_pos = f1[point] + (beat["start"] - f1.time_stamp) * ((f2[point] - f1[point]) / (f2.time_stamp - f1.time_stamp))
                    #print(beat_pos)
                    beat_data.append(beat_pos)
                    #times += [beat["start"]]
        #creating a 1 row df for the beat
        beat_line = pd.DataFrame([beat_data], columns = ["start_time"] + data_columns)
        beats_df_start = beats_df_start.append(beat_line)

    curr_frame = clap_frame
    last_frame = new_dance.index[-1]

    #Middle of Beat
    for beat in beats[:-1]:
        beat_data = [beat["start"]]
        beat_mid = beat["start"] + beat["duration"] / 2
        if (curr_frame < last_frame):
            while new_dance.loc[curr_frame + 1]["time_stamp"] < beat_mid:
                curr_frame += 1
            if new_dance.loc[curr_frame + 1]["time_stamp"] > beat_mid:
                #pinpointing position at beat
                #only for head_x right now, needs to be expanded to all points
                for point in data_columns:
                    f1 = new_dance.loc[curr_frame]
                    f2 = new_dance.loc[curr_frame + 1]
                    beat_pos = f1[point] + (beat_mid - f1.time_stamp) * ((f2[point] - f1[point]) / (f2.time_stamp - f1.time_stamp))
                    beat_data.append(beat_pos)
                    #times += [beat["start"]]
        #creating a 1 row df for the beat
        beat_line = pd.DataFrame([beat_data], columns = ["start_time"] + data_columns)
        beats_df_mid = beats_df_mid.append(beat_line)

    #End of Beat
    curr_frame = clap_frame
    last_frame = new_dance.index[-1]
    for beat in beats[:-2]:
        beat_data = [beat["start"]]
        beat_end = beat["start"] + beat["duration"]
        if (curr_frame < last_frame):
            while new_dance.loc[curr_frame + 1]["time_stamp"] < beat_end:
                curr_frame += 1
            if new_dance.loc[curr_frame + 1]["time_stamp"] > beat_end:
                #pinpointing position at beat
                #only for head_x right now, needs to be expanded to all points
                for point in data_columns:
                    f1 = new_dance.loc[curr_frame]
                    f2 = new_dance.loc[curr_frame + 1]
                    beat_pos = f1[point] + (beat_end - f1.time_stamp) * ((f2[point] - f1[point]) / (f2.time_stamp - f1.time_stamp))
                    beat_data.append(beat_pos)
                    #times += [beat["start"]]
        #creating a 1 row df for the beat
        beat_line = pd.DataFrame([beat_data], columns = ["start_time"] + data_columns)
        beats_df_end = beats_df_end.append(beat_line)

    beats_df_end = beats_df_end.append(new_dance.loc[last_frame][:66])
    beats_df_end.loc[last_frame, "start_time"] = beats[-2]["start"] #setting this manually since the data doesn't go this far

    mid_and_end = beats_df_mid.merge(beats_df_end, on="start_time", how="outer", suffixes=["", "_1"])
    standard_beats = beats_df_start.merge(mid_and_end, on="start_time", how="outer", suffixes=["_0", "_1/2"])
    return standard_beats

In [12]:
standardize_beats('../data/Wannabe_SpiceGirls/1Je1IMUlBXcx1Fz0WE7oPT_date12_23_Wannabe_SpiceGirls_0.txt')

Unnamed: 0,start_time,head_x_0,head_y_0,head_z_0,neck_x_0,neck_y_0,neck_z_0,spine_x_0,spine_y_0,spine_z_0,...,anklel_z_1,ankler_x_1,ankler_y_1,ankler_z_1,footl_x_1,footl_y_1,footl_z_1,footr_x_1,footr_y_1,footr_z_1
0,0.25371,0.298888,0.515556,3.005120,0.333421,0.394791,3.016600,0.316209,0.112909,3.037500,...,2.893405,0.388149,-0.781291,3.033900,0.162484,-0.794852,2.874600,0.419327,-0.827117,2.939772
1,0.69642,0.217616,0.506575,2.917944,0.215638,0.381377,2.938404,0.200090,0.118136,2.950729,...,2.914150,0.294944,-0.749018,3.023535,0.116232,-0.800627,2.793922,0.284744,-0.796933,2.955659
2,1.14002,0.158906,0.522393,2.949293,0.152941,0.390391,2.966208,0.141455,0.117844,2.979710,...,2.884442,0.213888,-0.717715,2.875268,0.113803,-0.774678,2.758125,0.168863,-0.756908,2.790280
3,1.58448,0.090785,0.528761,2.990522,0.074306,0.395229,2.996881,0.074811,0.122559,3.009419,...,3.033419,0.140924,-0.738353,2.863295,-0.149679,-0.783685,2.919149,0.107979,-0.782945,2.737020
4,2.02737,0.008130,0.526957,3.024919,-0.008236,0.392236,3.026049,-0.013990,0.122717,3.037196,...,3.021936,0.140114,-0.749420,2.878170,-0.215027,-0.774393,2.891750,0.141366,-0.784862,2.737782
5,2.47103,-0.049394,0.529387,3.059833,-0.072972,0.394704,3.059540,-0.076174,0.124975,3.074576,...,3.045566,0.123164,-0.757166,2.895228,-0.198401,-0.792950,2.909838,0.135301,-0.796172,2.761290
6,2.91624,-0.098059,0.530590,3.089409,-0.117075,0.400350,3.088624,-0.113307,0.128910,3.106715,...,3.078740,0.098498,-0.747925,2.933657,-0.209506,-0.797074,2.940026,0.110537,-0.791425,2.793946
7,3.36063,-0.105881,0.530565,3.115349,-0.120466,0.397022,3.116873,-0.118490,0.125782,3.135661,...,3.110026,0.122918,-0.753223,3.039247,-0.221664,-0.787325,2.969753,0.122641,-0.794792,2.899930
8,3.80502,-0.070317,0.532616,3.165053,-0.075803,0.392015,3.162130,-0.080566,0.119922,3.173519,...,3.122320,0.140508,-0.749298,3.167569,-0.224998,-0.778990,2.989416,0.145262,-0.791981,3.012180
9,4.24821,-0.001554,0.526490,3.216314,-0.003036,0.388521,3.209383,-0.000634,0.115098,3.218984,...,3.132448,0.144585,-0.755937,3.220620,-0.212280,-0.757135,2.999040,0.148536,-0.797431,3.085610


In [19]:
#defining columns for beat structure
start = [name + '_0' for name in data_columns]
mid = [name + '_1/2' for name in data_columns]
end = [name + '_1' for name in data_columns]
#cols should be altered as we do more feature engineering, perhaps adding more snapshots or other data (velocity and such)
cols = ["start_time"] + start + mid + end
training_df_of_beats = pd.DataFrame(columns=cols)


for file in files:
    print(file)
    beats = standardize_beats(file)
    training_df_of_beats = pd.concat([training_df_of_beats, beats])
    
training_df_of_beats

../data/Sorry_JustinBeiber1/09CtPGIpYB4BrO8qb1RGsF_date12_20_Sorry_JustinBieber_2.txt
../data/Pony_Genuwine/6mz1fBdKATx6qP4oP1I65G_date12_21_Pony_Ginuwine_2.txt
../data/Wannabe_SpiceGirls/1Je1IMUlBXcx1Fz0WE7oPT_date12_23_Wannabe_SpiceGirls_0.txt
../data/ConCalma_DaddyYankee/5w9c2J52mkdntKOmRLeM2m_date12_23_ConCalma_DaddyYankee_2.txt
../data/Motivation_Normani/0rIAC4PXANcKmitJfoqmVm_date12_21_Motivation_Normani_4.txt
../data/SideToSide_ArianaGrande/1pKeFVVUOPjFsOABub0OaV_date12_23_SideToSide_ArianaGrande_4.txt
../data/CrazyInLove_Beyonce/5IVuqXILoxVWvWEPm82Jxr_date12_23_CrazyInLove_Beyonce_3.txt
../data/SweatDreams_Weslee/7hySbX6I73xmWGN9HERH3u_date12_21_SweatDreams_Weslee_1.txt
../data/WhyYouAlwaysHating_YG/39hnH8WdPmNT3Q3yzwC9Rg_date12_21_WhyYouAlwaysHating_YG_2.txt
../data/InMyFeelings_Drake/2G7V7zsVDxg1yRsu7Ew9RJ_date12_20_InMyFeelings_Drake_4.txt
../data/BolaRebola_Tropkillaz/1OUPXna2MCgAt3VNmXJBtg_date12_20_BolaRebola_Tropkillaz_2.txt
../data/OneTwoStep_Ciara/7uKcScNXuO3MWw6LowBjW

Unnamed: 0,start_time,head_x_0,head_y_0,head_z_0,neck_x_0,neck_y_0,neck_z_0,spine_x_0,spine_y_0,spine_z_0,...,anklel_z_1,ankler_x_1,ankler_y_1,ankler_z_1,footl_x_1,footl_y_1,footl_z_1,footr_x_1,footr_y_1,footr_z_1
0,7.35531,-0.317271,0.824231,2.966810,-0.269299,0.688692,2.962600,-0.256158,0.402921,3.010760,...,3.065290,0.160912,-0.678097,2.910086,-0.392931,-0.784001,2.998208,0.105814,-0.770568,2.846308
1,7.80114,-0.370772,0.805886,2.891061,-0.333496,0.665798,2.892634,-0.318227,0.388664,2.947918,...,3.009662,0.067826,-0.673040,2.915956,-0.379181,-0.764584,2.977127,0.071432,-0.770278,2.880910
2,8.24776,-0.379849,0.817991,2.926162,-0.352104,0.671197,2.933172,-0.334387,0.392530,2.989316,...,3.038960,-0.066552,-0.662193,2.987742,-0.397890,-0.768093,2.953152,-0.010641,-0.750893,2.876400
3,8.69396,-0.336438,0.822493,2.931561,-0.343405,0.671301,2.937150,-0.330480,0.393348,2.994771,...,3.044521,-0.065808,-0.660959,3.000678,-0.399082,-0.771959,2.965804,-0.031103,-0.753714,2.911680
4,9.13935,-0.236707,0.823503,2.930138,-0.243479,0.668382,2.922269,-0.248332,0.392926,2.980808,...,3.039835,-0.025088,-0.653283,2.960769,-0.398946,-0.772529,2.964350,0.001331,-0.725017,2.862299
5,9.58236,-0.098322,0.829536,2.928052,-0.119394,0.673807,2.916871,-0.140110,0.391464,2.960215,...,3.032277,-0.046704,-0.670490,2.939809,-0.398424,-0.772226,2.962151,-0.014526,-0.743669,2.832096
6,10.02693,-0.041641,0.834000,2.921929,-0.063919,0.677723,2.906419,-0.086504,0.394788,2.951636,...,2.991778,-0.021252,-0.675985,2.940664,-0.379600,-0.765532,2.947964,-0.013117,-0.752913,2.827393
7,10.47149,-0.002641,0.836248,2.904750,-0.027018,0.679108,2.892269,-0.056368,0.400812,2.941363,...,2.943002,-0.011114,-0.679036,2.942615,-0.301858,-0.738810,2.855531,-0.007556,-0.758438,2.824480
8,10.91604,0.008869,0.837771,2.896550,-0.013550,0.678794,2.884310,-0.042406,0.402456,2.931780,...,2.902920,-0.009186,-0.679731,2.943313,-0.266467,-0.733522,2.817074,-0.005608,-0.760231,2.823542
9,11.36058,0.002707,0.838901,2.890267,-0.022500,0.680414,2.874326,-0.047498,0.402300,2.910079,...,2.889997,-0.008837,-0.679683,2.943477,-0.261692,-0.737245,2.800877,-0.004821,-0.760259,2.824080


Unnamed: 0,start_time,head_x_0,head_y_0,head_z_0,neck_x_0,neck_y_0,neck_z_0,spine_x_0,spine_y_0,spine_z_0,...,anklel_z_1,ankler_x_1,ankler_y_1,ankler_z_1,footl_x_1,footl_y_1,footl_z_1,footr_x_1,footr_y_1,footr_z_1
