In [338]:
import numpy as np
import pandas as pd
from os.path import join as joinpath

In [339]:
path = "/Users/polinap/Yandex.Disk.localized/RESEARCH_RU/VOCAL_NOTES_PROJECT/RUSSIAN/ANALYSIS/"
song = "Da_po_zoriushke_1"
transcriber_list = ["PP", "OV"]
pitches_fileext_list = ["notes", "segments", "changed_pitches"]
cents_fileext_list = ["notes_cents_tc351.84", "segments_cents_tc351.84", "changed_cents"]
divider = "__"

In [340]:
def read_from_csv(notespath):
    notes = pd.read_csv(notespath, header=None) # doesn't read headers

    # remove columns beyond the first three
    if len(notes.columns) > 3:
        notes = notes.iloc[:,:3]

    # remove headers
    if notes.iloc[0,0]=="TIME":
        notes = notes.iloc[1:,:]
        notes.reset_index(drop=True, inplace=True)

    notes.columns = ['TIME', 'VALUE', 'DURATION']
    
    notes['VALUE'] = notes['VALUE'].astype(float)
    if all(notes['VALUE']%1==0):        # all integers
        notes['VALUE'] = notes['VALUE'].astype(int)

    return notes

In [344]:
def add_maxmin_to_files(notespath_list):
    notes_list = []
    concat_list = []
    for notespath in notespath_list:
        notes = read_from_csv(notespath)
        notes_list.append(notes)
        concat_list.append(notes['VALUE'].astype(float))

    # combine the pitches of all transcriptions, find max and min values
    df = pd.concat(concat_list, axis=1)
    array = df.to_numpy()

    mymax = np.nanmax(array)
    mymin = np.nanmin(array)

    # add max and min values at the start of each transcription
    for notes in notes_list:
        notes.loc[-2] = [0.001, mymax, 0.001]
        notes.loc[-1] = [0.002, mymin, 0.001]
        notes.index = notes.index + 2 
        notes.sort_index(inplace=True)
        if all(notes['VALUE']%1==0): 
            notes['VALUE'] = notes['VALUE'].astype(int)
        else: 
            notes['VALUE'] = notes['VALUE'].astype(float)
        
    # save
    for notes, notespath in zip(notes_list, notespath_list):
        notes.to_csv(notespath, index = False)

In [346]:
# adjust pitches files
notespath_list = []
for transcriber in transcriber_list:
    for fileext in pitches_fileext_list:
        filename = song + divider + transcriber + divider + fileext + '.csv'
        notespath = joinpath(path,song,transcriber,filename)
        notespath_list.append(notespath)
        
add_maxmin_to_files(notespath_list)

          TIME    VALUE     DURATION
0        0.001  361.985        0.001
1        0.002  236.258        0.001
2  0.685714286  314.455  0.690793651
3  1.442539683  237.279  0.177777778
4  1.620317460  236.325  0.391111111
          TIME    VALUE     DURATION
0        0.001  361.985        0.001
1        0.002  236.258        0.001
2  0.685714286  314.455  0.690793651
3  1.442539683  237.279  0.177777778
4   1.62031746  236.325  0.391111111
          TIME    VALUE     DURATION
0        0.001  361.985        0.001
1        0.002  236.258        0.001
2  2.011428571  341.268  0.421587302
3  2.438095238  298.403  0.126984127
4  3.239183673  341.747  0.269206349
          TIME    VALUE     DURATION
0        0.001  361.985        0.001
1        0.002  236.258        0.001
2  0.757551020  314.404  0.635646259
3  1.469387755  237.391  0.191564626
4  1.660952381  236.258  0.398367347
          TIME    VALUE     DURATION
0        0.001  361.985        0.001
1        0.002  236.258        0.001
2

In [347]:
# adjust cents files 
notespath_list = []
for transcriber in transcriber_list:
    for fileext in cents_fileext_list:
        filename = song + divider + transcriber + divider + fileext + '.csv'
        notespath = joinpath(path,song,transcriber,filename)
        notespath_list.append(notespath)
        
add_maxmin_to_files(notespath_list)

          TIME  VALUE     DURATION
0        0.001     49        0.001
1        0.002   -690        0.001
2  0.685714286   -195  0.690793651
3  1.442539683   -682  0.177777778
4  1.620317460   -689  0.391111111
          TIME  VALUE     DURATION
0        0.001     49        0.001
1        0.002   -690        0.001
2  0.685714286   -194  0.690793651
3  1.442539683   -682  0.177777778
4   1.62031746   -689  0.391111111
          TIME  VALUE     DURATION
0        0.001     49        0.001
1        0.002   -690        0.001
2  2.016507937    -53  0.421587302
3  2.443174603   -285  0.126984127
4  3.245714286    -50  0.269206349
          TIME  VALUE     DURATION
0        0.001     49        0.001
1        0.002   -690        0.001
2  0.757551020   -195  0.635646259
3  1.469387755   -681  0.191564626
4  1.660952381   -690  0.398367347
          TIME  VALUE     DURATION
0        0.001     49        0.001
1        0.002   -690        0.001
2  0.748843537   -195  0.635646259
3  1.460680272   -68