Purpose: 

- Creates frame specification file: part+"/"+part+'_frame_order.txt'

- User must change constants below

- assumes a .times file is used for the original video's score alignment. This means time needs to be converted from "AT" to seconds

- use in conjunction with .csv file specifying final times (or stft frame number) of each part. This can be found in virtual ensemble/Mozart/notebooks/timing

In [212]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import pandas as pd
import music21
from scipy.interpolate import interp1d
import cv2


# Constants from user

In [213]:
part = "horn_in_e_2"
START_VIDEO_FPS = 30
END_VIDEO_FPS = 30
path_to_times = "~/Desktop/Projects/virtual_ensemble/Mozart_Serenade/orchestra_part_times/mozart_serenade_eflat."
quarter_per_measure = 4

In [214]:
SECONDS_PER_AT_FRAME = 256/8000

video_time = 5 min 55 sec 6 ms

In [215]:
video_secs = 5*60+55+.06

In [216]:
video_secs

355.06

In [217]:
MAX_FRAME_NUM = int(video_secs*33)
MAX_FRAME_NUM

11716

Derived constants

In [218]:
timesFile = path_to_times+part+".times"
START_VIDEO_SPF = 1/START_VIDEO_FPS  
END_VIDEO_SPF = 1/END_VIDEO_FPS

measure evaluation based on time signature (quarters per measure)

In [219]:
#use -1 notation on pickup
def eval_measure_fun(measure_str, quarter_per_measure = 4, pickup = False):
    mixed_lst = measure_str.split('+')
    measures = eval(mixed_lst[0])
    beats = eval(mixed_lst[1])
    measure_len = quarter_per_measure * .25 #assume quarter is .25
    
    return measures*measure_len + beats
    

In [220]:
#THIS IPLEMENTATION USES ATM, COULD ALSO USE MIDI
'''
Read in .times file as dataframe
'''
times_df = pd.read_csv(timesFile, 
                          header=None,
                          delim_whitespace=True,
                          names=['measure', 'at', 'marked'])

#strip
times_df = times_df.iloc[4:-1] 

#remove marked
times_df = times_df[['measure', 'at']]

In [221]:

'''
use mapping to cast measure
'''

evaluatedMeasure=list(map(eval_measure_fun, times_df['measure']))

times_df['eval_measure']=evaluatedMeasure #start at 0 for vid frame calculation        



In [222]:
#video
times_df['seconds']= times_df['at'].astype(np.float)*SECONDS_PER_AT_FRAME

In [223]:
#video
times_df['vid_frames']= (times_df['seconds']*START_VIDEO_FPS).astype(np.int)

In [224]:
times_df

Unnamed: 0,measure,at,eval_measure,seconds,vid_frames
4,1+1/8,72.000,1.125,2.304,69
5,1+1/4,87.000,1.250,2.784,83
6,1+3/8,101.000,1.375,3.232,96
7,1+1/2,114.000,1.500,3.648,109
8,1+5/8,127.000,1.625,4.064,121
...,...,...,...,...,...
291,88+1/2,9998.000,88.500,319.936,9598
292,89+0/1,10060.000,89.000,321.920,9657
293,89+1/2,10117.000,89.500,323.744,9712
294,90+0/1,10179.000,90.000,325.728,9771


# Import new times

For now do conversion here, really should be in seconds

In [225]:
# Constants for reading .times or .ideal files

SECONDS_PER_AT_FRAME = 256/8000

fft_size=1024


hop_size=int(fft_size/4)
sr_ensemble=44100


fft_lim =513 #largest bin size for cutoff

#X seconds/sample * 512 samples/STFT frame

def seconds_to_stft_frames(seconds):
    samples = seconds * sr_ensemble
    return  math.floor((samples)/hop_size)


def stft_frames_to_seconds(stft_frames):
    samples = fft_size + (stft_frames-1)*hop_size
    return samples/sr_ensemble
    
    


half_step = 1/12 #multiply this by change to get 

In [226]:
perf_STFT_FRAME_path = "./grid_stretch.csv"

In [227]:
perf_STFT_FRAME_df = pd.read_csv(perf_STFT_FRAME_path)



In [228]:
perf_STFT_FRAME_df

Unnamed: 0,oboe_1,oboe_2,clarinet_1,clarinet_2,bassoon_1,bassoon_2,horn_in_e_1,horn_in_e_2,eval_measure
0,22.245967,22.325539,23.187034,17.806747,22.224177,24.661869,22.275996,16.676778,1.000
1,109.126259,109.235900,110.003125,109.570675,108.937609,110.638391,110.532493,109.480074,1.125
2,195.958582,196.064291,196.677645,201.594476,195.582084,196.422828,198.735589,202.556530,1.250
3,282.718056,282.755883,283.035166,290.157027,282.137407,281.760811,281.408054,289.415825,1.375
4,369.476195,369.398412,368.984487,369.692397,368.754768,366.476242,370.202615,371.111344,1.500
...,...,...,...,...,...,...,...,...,...
850,61519.339263,61521.666874,61518.299594,61524.808244,61527.751193,61515.774980,61521.546595,61525.071840,88.875
851,61597.284029,61601.746089,61596.846546,61606.796629,61611.261959,61590.738088,61600.083376,61608.435953,89.000
852,61916.841386,61917.612115,61907.104689,61904.635693,61904.663881,61920.390434,61918.786944,61913.804808,89.500
853,62207.497545,62256.114959,62239.398854,62251.967325,62243.909298,62263.807303,62234.917513,62243.221807,90.000


In [229]:
perf_STFT_FRAME_df =perf_STFT_FRAME_df[['eval_measure', part]]
perf_STFT_FRAME_df['target_STFT_FRAME'] = perf_STFT_FRAME_df[part]
perf_STFT_FRAME_df['target_time']= list(map(stft_frames_to_seconds,perf_STFT_FRAME_df['target_STFT_FRAME']))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  perf_STFT_FRAME_df['target_STFT_FRAME'] = perf_STFT_FRAME_df[part]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  perf_STFT_FRAME_df['target_time']= list(map(stft_frames_to_seconds,perf_STFT_FRAME_df['target_STFT_FRAME']))


In [230]:
perf_STFT_FRAME_df

Unnamed: 0,eval_measure,horn_in_e_2,target_STFT_FRAME,target_time
0,1.000,16.676778,16.676778,0.114223
1,1.125,109.480074,109.480074,0.652946
2,1.250,202.556530,202.556530,1.193253
3,1.375,289.415825,289.415825,1.697471
4,1.500,371.111344,371.111344,2.171712
...,...,...,...,...
850,88.875,61525.071840,61525.071840,357.169759
851,89.000,61608.435953,61608.435953,357.653687
852,89.500,61913.804808,61913.804808,359.426350
853,90.000,62243.221807,62243.221807,361.338612


# Join video information with specified locations

In [231]:
joined = (perf_STFT_FRAME_df.set_index('eval_measure').join(times_df.set_index('eval_measure')))
joined_frames = joined.dropna()[['target_time', 'vid_frames']]

In [232]:
joined_frames

Unnamed: 0_level_0,target_time,vid_frames
eval_measure,Unnamed: 1_level_1,Unnamed: 2_level_1
1.125,0.652946,69.0
1.250,1.193253,83.0
1.375,1.697471,96.0
1.500,2.171712,109.0
1.625,2.647344,121.0
...,...,...
88.500,355.695373,9598.0
89.000,357.653687,9657.0
89.500,359.426350,9712.0
90.000,361.338612,9771.0


In [233]:
new_row = pd.DataFrame({'target_time':0, 'vid_frames':0},
                                                            index =[0])

In [234]:
time_to_frames_df = pd.concat([new_row, joined_frames])#.reset_index(drop = True)

# Final specification dataframe

In [235]:
time_to_frames_df.iloc[0:40]

Unnamed: 0,target_time,vid_frames
0.0,0.0,0.0
1.125,0.652946,69.0
1.25,1.193253,83.0
1.375,1.697471,96.0
1.5,2.171712,109.0
1.625,2.647344,121.0
1.75,3.115931,134.0
1.875,3.661708,148.0
2.0,4.206216,163.0
3.0,8.252153,266.0


# Perform interpolation

In [236]:
def get_idx(target_time_lst, frame_lst, spf ):
    frame_idx_lst = []
    wait_frames = int((target_time_lst[1] - target_time_lst[0])*(1/spf))
    frame_idx_lst+= [frame_lst[1]]* wait_frames#assume start at 1+0/1
    print(len(frame_idx_lst))
    for i in range(2, len(target_time_lst)): #subtractive, 

        #check that frames are accumulating as expected (similar to clock correction)
        target_frame_pos = int(target_time_lst[i-1]*(1/spf))
        diff_len = target_frame_pos-len(frame_idx_lst)
        print(diff_len)
        if diff_len >0:
            rep_frame = frame_idx_lst[-1:]
            frame_idx_lst +=diff_len*rep_frame #repeat frame until time is reached
            
        #Interpolation 
        time_diff = target_time_lst[i] - target_time_lst[i-1]
        prev_frame = frame_lst[i-1]
        cur_frame = frame_lst[i]

        num_frames = int(time_diff/spf)
        new_idx = np.linspace(prev_frame, cur_frame, num=num_frames, endpoint=False)
        
        #print("time diff is ", time_diff)
        #print("previous time", target_time_lst[i-1], 
        #      "should occur at frame", target_time_lst[i-1]*(1/spf))
        #print("currently at frame", len(frame_idx_lst))
        #print("numer of frames", num_frames)
        #print('prev_frame is ', prev_frame)
        #print("prev time is ", target_time_lst[i-1])
        #print("new indices are", new_idx)
        frame_idx_lst+=list(new_idx)
        
    frame_idx_lst+=frame_lst[-1:]#add last frame
    frame_idx_lst= [int(round(item )) for item in frame_idx_lst]
    return frame_idx_lst

In [237]:
idx_lst = get_idx(list(
    time_to_frames_df['target_time']), list(time_to_frames_df['vid_frames']), 
                 END_VIDEO_SPF)

19
0
0
0
1
0
0
0
1
0
1
1
0
1
1
1
1
0
0
0
1
0
0
0
1
1
0
1
0
1
0
1
0
1
0
0
1
1
1
0
0
1
1
0
0
0
1
0
0
1
1
0
0
1
0
1
0
0
1
1
0
0
1
0
1
1
0
1
1
0
1
0
1
1
0
1
0
0
1
0
1
0
1
0
1
1
1
0
1
0
1
0
1
1
0
1
0
0
1
0
1
0
1
1
0
1
0
1
0
1
0
1
0
0
1
0
1
0
1
0
1
1
0
1
0
0
1
1
0
1
0
0
0
1
1
1
0
1
0
1
0
0
1
0
1
0
0
1
1
0
1
0
1
1
0
1
0
0
1
0
1
0
1
1
1
0
1
0
1
1
1
1
0
1
1
0
1
0
1
0
1
0
1
0
1
1
0
1
1
0
1
1
1
1
0
1
0
1
0
0
1
0
0
1
1
0
1
0
1
0
0
1
0
1
0
0
1
0
0
1
1
0
0
1
0
0
1
1
0
0
0
0
0
0
1
0
0
1
0
0
1
1
0
0
0
1
0
1
1
1
0
0
1
1
0
1
1
0
1
0
0
0
1
0
0
0
1
0
0
1
0
0
1
1
0
1
0
1
1
0
0
0
0
1
0
1
1
1
0
1
0
1


Index of ffmpeg starts at 1 so:

In [238]:
idx_lst = [el + 1 for el in idx_lst]

In [239]:
idx_lst

[70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 70,
 71,
 72,
 73,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 109,
 110,
 111,
 112,
 113,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 163,
 164,
 165,
 166,
 167,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 173,
 174,
 175,
 176,
 177,
 178,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
 184,
 185,
 186,
 187,
 188,
 189,
 190,
 190,
 191,
 192,
 193,
 194,
 195,
 195,
 196,
 197,
 198,
 199,
 200,
 201,
 201,
 202,
 203,
 204,
 205,
 20

# write frame order to be read by shell script


In [240]:
convert_to_name=lambda x: part+"_"+str(x).zfill(5)+".jpg"

In [241]:
frame_list=list(map(convert_to_name, idx_lst))

In [244]:
with open("part_data/"+part+"/"+part+'_frame_order.txt', mode='wt', encoding='utf-8') as myfile:
    myfile.write('\n'.join(frame_list))

In [245]:
part

'horn_in_e_2'