In [47]:
import keras
import pandas as pd
import os
from collections import defaultdict


DATA_PATH = os.path.normpath(os.path.abspath('../Data'))
CATS = [str(i) for i in range(1, 6)]

RESULTING_CSV = os.path.join(DATA_PATH, 'feature_csv.csv')

FEATURES = 'spec_bandwidth spec_centroids spec_rolloff zero_crossing'.split()

data = defaultdict(dict) # cat -> features -> filename and its features

In [48]:
SIZE = 43

def create_columns(prefix, size=SIZE):
    return ' '.join(f'{prefix}_{i}' for i in range(size))

bandwidths_columns = create_columns('band')
centroids_columns = create_columns('cent')
rolloff_columns = create_columns('roll')
crossing_columns = create_columns('cros')

columns = ' '.join(('file', 'cat', bandwidths_columns, centroids_columns, rolloff_columns, crossing_columns)).split()

In [49]:
rename_cols_dict = {
    'spec_bandwidth': {str(i): bandwidths_columns[i] for i in range(SIZE)},
    'spec_centroids': {str(i): centroids_columns[i] for i in range(SIZE)},
    'spec_rolloff': {str(i): rolloff_columns[i] for i in range(SIZE)},
    'zero_crossing': {str(i): crossing_columns[i] for i in range(SIZE)},
}
fn_df = pd.DataFrame(columns=columns)
fn_df.set_index('file', inplace=True)

In [50]:
index = 0
indexes = dict()  # filename to index

for_fn_df = []

for cat in CATS:
    for feature in FEATURES:
        path = os.path.join(DATA_PATH, cat, 'csv', feature + '.csv')
        df = pd.read_csv(path, index_col='file')

        df.rename(columns=rename_cols_dict[feature], inplace=True)  # тут переименовали колонки оригинальный датафреймов
        data[cat][feature] = df  # это вроде уже даже не нужно

        for file, series in df.iterrows():
            if not file in indexes.keys():
                for_fn_df.append([file, cat, *series.tolist()[0:43]])
                indexes[file] = index
                index += 1
            else:
                file_index = indexes[file]
                for_fn_df[file_index].extend(series.tolist()[0:43])
        # Если нет файла, добавить строчку, далее по индексам аккуратно присоединить


In [51]:
fn_df = pd.DataFrame(for_fn_df, columns=columns)
fn_df.set_index('file', inplace=True)

In [57]:
fn_df.sample(10)

Unnamed: 0_level_0,cat,band_0,band_1,band_2,band_3,band_4,band_5,band_6,band_7,band_8,...,cros_33,cros_34,cros_35,cros_36,cros_37,cros_38,cros_39,cros_40,cros_41,cros_42
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\3\03-01-03-02-02-02-17_A17.wav,3,4610.918396,3594.30282,4234.233654,3710.203689,4615.573996,3941.307958,3956.397947,4195.888449,3990.946575,...,0.064578,0.092711,0.168798,0.003836,0.092072,0.033248,0.033248,0.0,0.0,0.042199
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\5\03-01-04-01-01-02-08_A8.wav,5,4239.901164,4940.922204,4073.699178,5218.792518,4870.381264,3307.474751,3165.580868,3324.588836,4863.323168,...,0.044944,0.099875,0.053059,0.018727,0.044944,0.03995,0.089888,0.018102,0.065543,0.016854
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\3\03-01-03-02-02-02-19_A19.wav,3,5533.844326,3546.474838,3027.931034,3331.953997,2159.348151,3811.60933,3491.386622,3653.632025,3519.133194,...,0.069212,0.076372,0.054893,0.020286,0.02864,0.03401,0.02148,0.057279,0.063246,0.102625
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\2\03-01-02-01-01-01-19_A19.wav,2,4647.098609,5403.48706,3385.707437,4872.590348,3809.337221,3369.36129,5269.097721,4420.589845,2925.568329,...,0.077852,0.0,0.068456,0.12349,0.07651,0.055705,0.028859,0.097987,0.112752,0.124832
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\2\03-01-01-01-02-02-08_A8.wav,2,6591.72154,3302.604016,2843.802797,2790.421315,2399.402326,2460.309807,4772.411212,4027.163318,2635.281912,...,0.026485,0.035791,0.007158,0.0,0.045812,0.088762,0.03937,0.032212,0.073729,0.042233
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\2\03-01-01-01-02-02-22_A22.wav,2,3975.565982,3036.991459,3256.68842,3950.156321,6083.8339,3724.748767,4235.629721,4578.207647,4528.837071,...,0.096113,0.100353,0.037456,0.008481,0.042403,0.04947,0.00424,0.036042,0.050883,0.037456
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\3\03-01-03-01-01-02-16_A16.wav,3,4399.140039,4433.311816,3535.597225,2833.707101,3778.964807,2684.503708,3466.447605,4162.825308,4116.283749,...,0.068456,0.084564,0.092617,0.05906,0.091275,0.083893,0.087248,0.05906,0.099329,0.167785
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\2\03-01-02-02-01-02-12_A12.wav,2,7480.602991,5427.334201,4436.037934,2742.437455,4043.162492,5112.548949,3311.682425,4827.343777,4689.37523,...,0.134541,0.119017,0.089263,0.095731,0.10414,0.095731,0.097671,0.087322,0.058215,0.075679
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\1\03-01-05-01-01-02-05_A5.wav,1,5008.500508,4297.975686,2759.465833,2261.458304,3946.776176,4141.018884,3346.373891,2780.167036,3036.301293,...,0.025575,0.085678,0.069693,0.001279,0.214834,0.098465,0.079923,0.05243,0.124041,0.234015
C:\Users\mvp31\Desktop\University\Semester_7\ML\Music\Data\1\03-01-05-02-02-01-03_A3.wav,1,4200.925871,2988.808049,3245.723052,3566.158878,2661.657599,4353.719042,2697.051366,2597.925985,4948.254094,...,0.23409,0.308799,0.300498,0.177089,0.140564,0.115661,0.127283,0.152186,0.130603,0.101273


In [58]:
fn_df.to_csv(RESULTING_CSV)
