In [2]:
!pip install librosa

Collecting librosa
  Using cached librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Using cached audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Using cached soundfile-0.13.1-py2.py3-none-macosx_11_0_arm64.whl.metadata (16 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl.metadata (5.6 kB)
Collecting lazy_loader>=0.1 (from librosa)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Collecting msgpack>=1.0 (from librosa)
  Downloading msgpack-1.1.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (8.4 kB)
Using cached librosa-0.11.0-py3-none-any.whl (260 kB)
Using cached audioread-3.0.1-py3-none-any.whl (23 kB)
Downloading lazy_loader-0.4-py3-none-any.whl (12 kB)
Downloading msgpack-1.1.0-cp312-cp312-macosx_11_0_arm64.whl (82 kB)
Downlo

### extract amplitude and frequency

In [2]:
import librosa as lb
import numpy as np
import pandas as pd
import zipfile as zf

In [10]:
y,sr =lb.load("audio.wav",sr=None)

In [15]:
fft_window_size = 1024 #controles the window size of the fft; higher== better frequency resolution but worse time resolution
window_step_size = 512   # controls the step size between windows higher == better time resolution but more overlap
D = lb.stft(y, n_fft=fft_window_size, hop_length=window_step_size) #creates stft matrix that has frequencies as rows, time-frames as columns 
                                                                    #and complex values containing amplitude and phase as values


In [None]:
magnitude = np.abs(D)               #transforms complex values into magnitude (how much energy a frequency contributes) 
                                    #like amplitude but in frequency domain
frequencies = lb.fft_frequencies(sr=sr,n_fft=fft_window_size) #maps indices in fft matrix to real frequencies


In [None]:
dominant_freq_ind = np.argmax(magnitude,axis =0) #finds frequencies with highest magnitude
dominant_freqs = frequencies[dominant_freq_ind] #maps itto real frequencies
dominant_mags = []
for i in range(magnitude.shape[1]):
    highest = dominant_freq_ind[i]
    dominant_mags.append(magnitude[highest,i]) #collects the amplitudes per time

time = lb.frames_to_time(np.arange(magnitude.shape[1]),sr=sr,hop_length=window_step_size) #turns ghe time steps to real time



In [38]:
audio_df = pd.DataFrame({"Common time (s)":time,"amplitude":dominant_mags,"frequency":dominant_freqs})

In [25]:
audio_df

Unnamed: 0,Common Time (s),amplitude,frequency
0,0.000000,0.003599,0.00
1,0.010667,0.007781,0.00
2,0.021333,0.007627,0.00
3,0.032000,0.007721,0.00
4,0.042667,0.007866,0.00
...,...,...,...
96536,1029.717333,13.428600,281.25
96537,1029.728000,5.286401,187.50
96538,1029.738667,4.206217,187.50
96539,1029.749333,4.553483,750.00


In [37]:
audio_df.to_csv('audio.csv', index=False)


In [50]:
import zipfile

# Define the path to the zip file and the file to add
zip_path = "sens_data.zip"
file_to_add = "audio.csv"

# Open the zip in append mode ('a') and add the file
with zipfile.ZipFile(zip_path, 'a') as zipf:
    zipf.write(file_to_add, arcname="audio.csv")


### add activity

In [14]:
joined_df = pd.read_csv("joined_data_with_audio.csv")
joined_interpol_df = pd.read_csv("joined_interpol_data_with_audio.csv")

In [15]:
times = [75.13,126.05,187.13,264.95,340.38,390.75,466.28,508.69,585.55,640.91,730.51,793.47,834.07,939.69]

def add_activities(joined_df):
    """adds correct activities to times"""
    joined_df.loc[(joined_df["Common time (s)"] > 0) & (joined_df["Common time (s)"] < times[0]), "Activity"] = "rest"
    joined_df.loc[(joined_df["Common time (s)"] > times[0]) & (joined_df["Common time (s)"] < times[1]), "Activity"] = "walk"
    joined_df.loc[(joined_df["Common time (s)"] > times[1]) & (joined_df["Common time (s)"] < times[2]), "Activity"] = "phone"
    joined_df.loc[(joined_df["Common time (s)"] > times[2]) & (joined_df["Common time (s)"] < times[3]), "Activity"] = "stairs"
    joined_df.loc[(joined_df["Common time (s)"] > times[3]) & (joined_df["Common time (s)"] < times[4]), "Activity"] = "rest"
    joined_df.loc[(joined_df["Common time (s)"] > times[4]) & (joined_df["Common time (s)"] < times[5]), "Activity"] = "phone"
    joined_df.loc[(joined_df["Common time (s)"] > times[5]) & (joined_df["Common time (s)"] < times[6]), "Activity"] = "socialize"
    joined_df.loc[(joined_df["Common time (s)"] > times[6]) & (joined_df["Common time (s)"] < times[7]), "Activity"] = "walk"
    joined_df.loc[(joined_df["Common time (s)"] > times[7]) & (joined_df["Common time (s)"] < times[8]), "Activity"] = "study"
    joined_df.loc[(joined_df["Common time (s)"] > times[8]) & (joined_df["Common time (s)"] < times[9]), "Activity"] = "walk"
    joined_df.loc[(joined_df["Common time (s)"] > times[9]) & (joined_df["Common time (s)"] < times[10]), "Activity"] = "stairs"
    joined_df.loc[(joined_df["Common time (s)"] > times[10]) & (joined_df["Common time (s)"] < times[11]), "Activity"] = "walk"
    joined_df.loc[(joined_df["Common time (s)"] > times[11]) & (joined_df["Common time (s)"] < times[12]), "Activity"] = "phone"
    joined_df.loc[(joined_df["Common time (s)"] > times[12]) & (joined_df["Common time (s)"] < times[13]), "Activity"] = "study"
    joined_df.loc[joined_df["Common time (s)"] > times[13], "Activity"] = "socialize"
    return joined_df

joined_df=add_activities(joined_df)
joined_interpol_df=add_activities(joined_interpol_df)

In [16]:
joined_interpol_df.isna().sum()

Distance (cm)                         0
Illuminance (lx)                      0
amplitude                             0
frequency                             0
Latitude (°)                          0
Longitude (°)                         0
Height (m)                        16103
Velocity (m/s)                   416297
Direction (°)                    416297
Horizontal Accuracy (m)               0
Vertical Accuracy (m)                 0
Magnetic field x (µT)                 0
Magnetic field y (µT)                 0
Magnetic field z (µT)                 0
Acceleration x (m/s^2)                0
Acceleration y (m/s^2)                0
Acceleration z (m/s^2)                0
Gyroscope x (rad/s)                   0
Gyroscope y (rad/s)                   0
Gyroscope z (rad/s)                   0
Pressure (hPa)                        0
Linear Acceleration x (m/s^2)         0
Linear Acceleration y (m/s^2)         0
Linear Acceleration z (m/s^2)         0
Common time (s)                       0


In [17]:
joined_interpol_df.to_csv("data_int_without_questionnaire.csv", index=False)
joined_df.to_csv("data_without_questionnaire.csv",index = False)

Unnamed: 0,Acceleration x (m/s^2),Acceleration y (m/s^2),Acceleration z (m/s^2),Gyroscope x (rad/s),Gyroscope y (rad/s),Gyroscope z (rad/s),Illuminance (lx),Linear Acceleration x (m/s^2),Linear Acceleration y (m/s^2),Linear Acceleration z (m/s^2),...,Vertical Accuracy (m),Magnetic field x (µT),Magnetic field y (µT),Magnetic field z (µT),Pressure (hPa),Distance (cm),Common time (s),Activity,amplitude,frequency
0,-1.889025,6.510732,7.223082,-0.087699,-0.106835,0.023549,126.148750,0.064004,0.030721,0.083742,...,2.046588,19.837200,-74.188202,80.715195,1005.813538,5.0,0.000000,rest,0.003599,0.00
1,-1.889025,6.510732,7.223082,-0.087699,-0.106835,0.023549,126.148750,0.064004,0.030721,0.083742,...,2.046588,19.837200,-74.188202,80.715195,1005.813538,5.0,0.002486,rest,0.003599,0.00
2,-1.889025,6.510732,7.223082,-0.087699,-0.106835,0.023549,126.148750,0.064004,0.030721,0.083742,...,2.046588,19.837200,-74.188202,80.715195,1005.813538,5.0,0.004973,rest,0.003599,0.00
3,-1.889025,6.510732,7.223082,-0.087699,-0.106835,0.023549,126.148750,0.064004,0.030721,0.083742,...,2.046588,19.837200,-74.188202,80.715195,1005.813538,5.0,0.007459,rest,0.007781,0.00
4,-1.898602,6.535572,7.266178,-0.087699,-0.106835,0.023549,126.148750,0.064004,0.030721,0.083742,...,2.046588,19.837200,-74.188202,80.715195,1005.813538,5.0,0.009945,rest,0.007781,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
416292,-2.198625,6.418107,6.226870,-0.111448,0.190454,0.032608,141.077499,-0.104240,0.097189,-0.756839,...,1.756249,19.422400,-74.151596,71.638397,1005.731506,5.0,1035.026800,rest,3.354277,281.25
416293,-2.200945,6.432322,6.248417,-0.157030,0.154565,0.037838,141.077499,-0.104240,0.097189,-0.756839,...,1.756249,19.422400,-74.151596,71.638397,1005.731506,5.0,1035.029286,rest,3.354277,281.25
416294,-2.139219,6.442198,6.319121,-0.202961,0.109733,0.042734,141.077499,-0.104240,0.097189,-0.756839,...,1.756249,19.422400,-74.151596,71.638397,1005.731506,5.0,1035.031772,rest,3.354277,281.25
416295,-2.074127,6.452823,6.454319,-0.251324,0.056673,0.043600,141.077499,-0.104240,0.097189,-0.756839,...,1.756249,18.421999,-74.639595,70.650200,1005.731506,5.0,1035.034258,rest,3.354277,281.25


True