In [2]:
!pip install librosa

Collecting librosa
  Using cached librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Using cached audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Using cached soundfile-0.13.1-py2.py3-none-macosx_11_0_arm64.whl.metadata (16 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl.metadata (5.6 kB)
Collecting lazy_loader>=0.1 (from librosa)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Collecting msgpack>=1.0 (from librosa)
  Downloading msgpack-1.1.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (8.4 kB)
Using cached librosa-0.11.0-py3-none-any.whl (260 kB)
Using cached audioread-3.0.1-py3-none-any.whl (23 kB)
Downloading lazy_loader-0.4-py3-none-any.whl (12 kB)
Downloading msgpack-1.1.0-cp312-cp312-macosx_11_0_arm64.whl (82 kB)
Downlo

### extract amplitude and frequency

In [33]:
import librosa as lb
import numpy as np
import pandas as pd
import zipfile as zf

In [34]:
y,sr =lb.load("audio.wav",sr=None)

In [35]:
fft_window_size = 1024 #controles the window size of the fft; higher== better frequency resolution but worse time resolution
window_step_size = 512   # controls the step size between windows higher == better time resolution but more overlap
D = lb.stft(y, n_fft=fft_window_size, hop_length=window_step_size) #creates stft matrix that has frequencies as rows, time-frames as columns 
                                                                    #and complex values containing amplitude and phase as values


In [36]:
magnitude = np.abs(D)               #transforms complex values into magnitude (how much energy a frequency contributes) 
                                    #like amplitude but in frequency domain
frequencies = lb.fft_frequencies(sr=sr,n_fft=fft_window_size) #maps indices in fft matrix to real frequencies


In [37]:
dominant_freq_ind = np.argmax(magnitude,axis =0) #finds frequencies with highest magnitude
dominant_freqs = frequencies[dominant_freq_ind] #maps itto real frequencies
dominant_mags = []
for i in range(magnitude.shape[1]):
    highest = dominant_freq_ind[i]
    dominant_mags.append(magnitude[highest,i]) #collects the amplitudes per time

time = lb.frames_to_time(np.arange(magnitude.shape[1]),sr=sr,hop_length=window_step_size) #turns ghe time steps to real time



In [38]:
audio_df = pd.DataFrame({"Common time (s)":time,"amplitude":dominant_mags,"frequency":dominant_freqs})

In [39]:
audio_df

Unnamed: 0,Common time (s),amplitude,frequency
0,0.000000,0.003599,0.00
1,0.010667,0.007781,0.00
2,0.021333,0.007627,0.00
3,0.032000,0.007721,0.00
4,0.042667,0.007866,0.00
...,...,...,...
96536,1029.717333,13.428600,281.25
96537,1029.728000,5.286401,187.50
96538,1029.738667,4.206217,187.50
96539,1029.749333,4.553483,750.00


In [37]:
audio_df.to_csv('audio.csv', index=False)


In [50]:
import zipfile

# Define the path to the zip file and the file to add
zip_path = "sens_data.zip"
file_to_add = "audio.csv"

# Open the zip in append mode ('a') and add the file
with zipfile.ZipFile(zip_path, 'a') as zipf:
    zipf.write(file_to_add, arcname="audio.csv")


### add activity

In [24]:
joined_df = pd.read_csv("corrected_data.csv")
joined_interpol_df = pd.read_csv("corrected_int_data.csv")

In [25]:
times = [75.13,126.05,187.13,264.95,340.38,390.75,466.28,508.69,585.55,640.91,730.51,793.47,834.07,939.69]

def add_activities(joined_df):
    """adds correct activities to times"""
    joined_df.loc[(joined_df["Common time (s)"] > 0) & (joined_df["Common time (s)"] < times[0]), "Activity"] = "rest"
    joined_df.loc[(joined_df["Common time (s)"] > times[0]) & (joined_df["Common time (s)"] < times[1]), "Activity"] = "walk"
    joined_df.loc[(joined_df["Common time (s)"] > times[1]) & (joined_df["Common time (s)"] < times[2]), "Activity"] = "phone"
    joined_df.loc[(joined_df["Common time (s)"] > times[2]) & (joined_df["Common time (s)"] < times[3]), "Activity"] = "stairs"
    joined_df.loc[(joined_df["Common time (s)"] > times[3]) & (joined_df["Common time (s)"] < times[4]), "Activity"] = "rest"
    joined_df.loc[(joined_df["Common time (s)"] > times[4]) & (joined_df["Common time (s)"] < times[5]), "Activity"] = "phone"
    joined_df.loc[(joined_df["Common time (s)"] > times[5]) & (joined_df["Common time (s)"] < times[6]), "Activity"] = "socialize"
    joined_df.loc[(joined_df["Common time (s)"] > times[6]) & (joined_df["Common time (s)"] < times[7]), "Activity"] = "walk"
    joined_df.loc[(joined_df["Common time (s)"] > times[7]) & (joined_df["Common time (s)"] < times[8]), "Activity"] = "study"
    joined_df.loc[(joined_df["Common time (s)"] > times[8]) & (joined_df["Common time (s)"] < times[9]), "Activity"] = "walk"
    joined_df.loc[(joined_df["Common time (s)"] > times[9]) & (joined_df["Common time (s)"] < times[10]), "Activity"] = "stairs"
    joined_df.loc[(joined_df["Common time (s)"] > times[10]) & (joined_df["Common time (s)"] < times[11]), "Activity"] = "walk"
    joined_df.loc[(joined_df["Common time (s)"] > times[11]) & (joined_df["Common time (s)"] < times[12]), "Activity"] = "phone"
    joined_df.loc[(joined_df["Common time (s)"] > times[12]) & (joined_df["Common time (s)"] < times[13]), "Activity"] = "study"
    joined_df.loc[joined_df["Common time (s)"] > times[13], "Activity"] = "socialize"
    return joined_df

joined_df=add_activities(joined_df)
joined_interpol_df=add_activities(joined_interpol_df)

In [26]:
joined_interpol_df.isna().sum()

Distance (cm)                         0
Illuminance (lx)                      0
amplitude                             0
frequency                             0
Latitude (°)                          0
Longitude (°)                         0
Height (m)                        16103
Velocity (m/s)                   416297
Direction (°)                    416297
Horizontal Accuracy (m)               0
Vertical Accuracy (m)                 0
Magnetic field x (µT)                 0
Magnetic field y (µT)                 0
Magnetic field z (µT)                 0
Acceleration x (m/s^2)                0
Acceleration y (m/s^2)                0
Acceleration z (m/s^2)                0
Gyroscope x (rad/s)                   0
Gyroscope y (rad/s)                   0
Gyroscope z (rad/s)                   0
Pressure (hPa)                        0
Linear Acceleration x (m/s^2)         0
Linear Acceleration y (m/s^2)         0
Linear Acceleration z (m/s^2)         0
Common time (s)                       0


In [27]:
joined_interpol_df.to_csv("data_int_without_questionnaire.csv", index=False)
joined_df.to_csv("data_without_questionnaire.csv",index = False)

In [28]:
joined_df.isna().sum()

Distance (cm)                         0
Illuminance (lx)                      0
amplitude                             0
frequency                             0
Latitude (°)                          0
Longitude (°)                         0
Height (m)                        16103
Velocity (m/s)                   416297
Direction (°)                    416297
Horizontal Accuracy (m)               0
Vertical Accuracy (m)                 0
Magnetic field x (µT)                 0
Magnetic field y (µT)                 0
Magnetic field z (µT)                 0
Acceleration x (m/s^2)                0
Acceleration y (m/s^2)                0
Acceleration z (m/s^2)                0
Gyroscope x (rad/s)                   0
Gyroscope y (rad/s)                   0
Gyroscope z (rad/s)                   0
Pressure (hPa)                        0
Linear Acceleration x (m/s^2)         0
Linear Acceleration y (m/s^2)         0
Linear Acceleration z (m/s^2)         0
Common time (s)                       0


In [40]:
accel = pd.read_csv("Accelerometer.csv")
audio_df

Unnamed: 0,Common time (s),amplitude,frequency
0,0.000000,0.003599,0.00
1,0.010667,0.007781,0.00
2,0.021333,0.007627,0.00
3,0.032000,0.007721,0.00
4,0.042667,0.007866,0.00
...,...,...,...
96536,1029.717333,13.428600,281.25
96537,1029.728000,5.286401,187.50
96538,1029.738667,4.206217,187.50
96539,1029.749333,4.553483,750.00


In [42]:
max_time_audio = max(audio_df["Common time (s)"])
max_time_accel = max(accel["Time (s)"])

In [43]:
print(max_time_accel)


1035.036735


In [44]:
print(max_time_audio)

1029.76


In [46]:
print(max(joined_interpol_df["Common time (s)"]))

1035.0367447945218


In [50]:
print(joined_interpol_df.tail(5))


        Distance (cm)  Illuminance (lx)  amplitude  frequency  Latitude (°)  \
416292            5.0        141.077499   3.354277     281.25     52.334468   
416293            5.0        141.077499   3.354277     281.25     52.334468   
416294            5.0        141.077499   3.354277     281.25     52.334468   
416295            5.0        141.077499   3.354277     281.25     52.334468   
416296            5.0        141.077499   3.354277     281.25     52.334468   

        Longitude (°)  Height (m)  Velocity (m/s)  Direction (°)  \
416292       4.866596    2.872287             NaN            NaN   
416293       4.866596    2.872287             NaN            NaN   
416294       4.866596    2.872287             NaN            NaN   
416295       4.866596    2.872287             NaN            NaN   
416296       4.866596    2.872287             NaN            NaN   

        Horizontal Accuracy (m)  ...  Acceleration z (m/s^2)  \
416292                   14.901  ...                