In [None]:
# Copyright (C) 2024 Takaaki Shiotani

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

In [1]:
import sys

import pandas as pd

sys.path.append("../src")

from datahandler import DataHandler
from presentation import FeatureHandler

In [2]:
dh = DataHandler()

data_list = []
for name in dh.names:
    for i in dh.name2indexes[name]:
        p = dh.load_presentation(name, i)
        p.wav_path = dh.get_wav_path(name, i)
        d = {}
        d = d | FeatureHandler.calc_articulation_rate(p)
        d = d | FeatureHandler.calc_formant_rates(p, 40, 100)
        d = d | FeatureHandler.calc_pitch_features(p)
        d = d | FeatureHandler.calc_volume_variation(p)
        d = d | FeatureHandler.calc_silence(p)
        d = d | FeatureHandler.calc_detailed_speeds(p)

        d["is_proficient"] = dh.name2ispro[name]
        d["name_label"] = dh.name2label[name]
        d["sex"] = dh.name2sex[name]
        d["number"] = i
        data_list.append(d)



In [3]:
processed_df = pd.DataFrame(data_list)

In [6]:
processed_df.head()

Unnamed: 0,AR,F1R_100,F2R_100,F1F2R_100,AVGP,STDP,Q1P,Q9P,VOL_VAR_LOCAL,VOL_VAR_GLOBAL,SILENCE_TIME_RATIO,SHORT_SILENCE_RATE,AVG_SPEED,STD_SPEED,AVG_SILENCE_DURATION_SENTENCE,STD_SILENCE_DURATION_SENTENCE,is_proficient,name_label,sex,number
0,6.883358,0.116969,0.06279,0.08988,5.095194,0.20865,127.87206,217.553125,0.065949,0.02559,0.157231,0.636364,7.289429,1.951765,2.211579,1.793125,False,1,male,1
1,7.044336,0.159771,0.050643,0.105207,4.926658,0.197851,105.071233,179.796849,0.066636,0.030079,0.17724,0.666667,7.261873,2.165066,1.147895,0.833212,False,1,male,2
2,7.25438,0.118012,0.045405,0.081708,4.914579,0.168032,106.907846,167.756392,0.069972,0.018125,0.169691,0.6125,7.898941,2.101763,1.421818,0.783869,False,1,male,3
3,6.377191,0.125451,0.043434,0.084443,4.906951,0.173037,106.292102,168.728195,0.070552,0.024298,0.226649,0.619565,6.917462,2.297324,2.105,2.369378,False,1,male,4
4,7.204087,0.177217,0.024878,0.101047,5.21757,0.216665,141.065749,248.463582,0.018212,0.009055,0.110521,0.591837,7.447845,1.696973,1.754167,2.254394,False,2,female,1


In [7]:
data_dir = DataHandler.DATA_DIR
processed_df.to_csv(data_dir / "processed_data.csv", index=False)