<a href="https://colab.research.google.com/github/Saif-M-Dhrubo/hierarchical-attention-HAR/blob/master/MEx_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###CUDA GPU Checking

In [1]:
!nvidia-smi

Mon Apr  6 04:53:56 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   51C    P0    34W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

## Library Import

In [2]:
import os
import requests
import zipfile
import glob
import platform

import time
import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

  import pandas.util.testing as tm


##MEx Dataset

###Routine for Fetching

In [0]:
def get_dataset(url:str, data_directory:str, file_name:str):

    print(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    print('GETTING DATASET ...')

    if not os.path.exists(data_directory):
        os.makedirs(data_directory)

    if not os.path.exists(file_name):
        response = requests.get(url, stream=True)
        data_file = open(file_name, 'wb')

        for chunk in response.iter_content(chunk_size=1024):
            data_file.write(chunk)

        data_file.close()
        print('\n---DATASET DOWNLOAD COMPLETE---')

###Data Download

In [0]:
DATA_URL = 'https://data.mendeley.com/datasets/p89fwbzmkd/3/files/227baf25-f0c0-4025-9c9b-9bda2743fbed/MEx.zip?dl=1'
DATA_DIRECTORY = '/'
DATA_FILE = 'MEx.zip'

In [5]:
get_dataset(DATA_URL, DATA_DIRECTORY, DATA_FILE)

2020-04-06 04:53:57
GETTING DATASET ...

---DATASET DOWNLOAD COMPLETE---


In [6]:
!unzip MEx.zip -d MEx_Data/

Archive:  MEx.zip
   creating: MEx_Data/act/
   creating: MEx_Data/act/01/
  inflating: MEx_Data/act/01/01_act_1.csv  
  inflating: MEx_Data/act/01/07_act_1.csv  
  inflating: MEx_Data/act/01/06_act_1.csv  
  inflating: MEx_Data/act/01/05_act_1.csv  
  inflating: MEx_Data/act/01/04_act_1.csv  
  inflating: MEx_Data/act/01/04_act_2.csv  
  inflating: MEx_Data/act/01/03_act_1.csv  
  inflating: MEx_Data/act/01/02_act_1.csv  
   creating: MEx_Data/act/02/
  inflating: MEx_Data/act/02/03_act_1.csv  
  inflating: MEx_Data/act/02/02_act_1.csv  
  inflating: MEx_Data/act/02/01_act_1.csv  
  inflating: MEx_Data/act/02/07_act_1.csv  
  inflating: MEx_Data/act/02/06_act_1.csv  
  inflating: MEx_Data/act/02/05_act_1.csv  
  inflating: MEx_Data/act/02/04_act_1.csv  
  inflating: MEx_Data/act/02/04_act_2.csv  
   creating: MEx_Data/act/03/
  inflating: MEx_Data/act/03/01_act_1.csv  
  inflating: MEx_Data/act/03/07_act_1.csv  
  inflating: MEx_Data/act/03/06_act_1.csv  
  inflating: MEx_Data/act/03/

In [0]:
DATA_DIR = 'MEx_Data'
THIGH_ACCEL = 'act'
WRIST_ACCEL = 'acw'

SUBJECT_LIST = ['%02d' % x for x in range(1, 31)]

In [0]:
def quantize_time(timestamp, quantization_level=2):
    m_sec = str(timestamp.microsecond)
    q_msec = m_sec[:quantization_level]

    timestamp = timestamp.replace(microsecond=(int(q_msec) * (10 ** (6 - quantization_level))))

    return timestamp

In [0]:
def get_activity(file_name:str):
    activivity = int(file_name.strip().split('_')[0])

    return activivity

In [11]:
complete_df = pd.DataFrame()

for subj in SUBJECT_LIST:
    accel_t_dir = os.path.join(DATA_DIR, THIGH_ACCEL, subj)
    accel_w_dir = os.path.join(DATA_DIR, WRIST_ACCEL, subj)

    accel_t_files = sorted(os.listdir(accel_t_dir))
    accel_w_files = sorted(os.listdir(accel_w_dir))

    for i in range(len(accel_t_files)):
        accel_t = os.path.join(accel_t_dir, accel_t_files[i])
        accel_w = os.path.join(accel_w_dir, accel_w_files[i])

        df_t = pd.read_csv(accel_t, header=None, names=['timestamp', 'act_x', 'act_y', 'act_z'])
        df_w = pd.read_csv(accel_w, header=None, names=['timestamp', 'acw_x', 'acw_y', 'acw_z'])

        df_t['timestamp'] = pd.to_datetime(df_t['timestamp'])
        df_w['timestamp'] = pd.to_datetime(df_w['timestamp'])

        df_t['timestamp'] = df_t['timestamp'].apply(quantize_time)
        df_w['timestamp'] = df_w['timestamp'].apply(quantize_time)

        df_t = df_t.groupby(['timestamp'], as_index=False)['act_x', 'act_y', 'act_z'].mean()
        df_w = df_w.groupby(['timestamp'], as_index=False)['acw_x', 'acw_y', 'acw_z'].mean()

        merged = pd.merge(df_t, df_w, how='outer', on=['timestamp', 'timestamp'], sort=True)
        merged['subject_id'] = int(subj)
        merged['activity'] = get_activity(accel_t_files[i])
        
        complete_df = pd.concat([complete_df, merged], ignore_index=True)



In [12]:
complete_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1229840 entries, 0 to 1229839
Data columns (total 9 columns):
 #   Column      Non-Null Count    Dtype         
---  ------      --------------    -----         
 0   timestamp   1229840 non-null  datetime64[ns]
 1   act_x       1227493 non-null  float64       
 2   act_y       1227493 non-null  float64       
 3   act_z       1227493 non-null  float64       
 4   acw_x       1189207 non-null  float64       
 5   acw_y       1189207 non-null  float64       
 6   acw_z       1189207 non-null  float64       
 7   subject_id  1229840 non-null  int64         
 8   activity    1229840 non-null  int64         
dtypes: datetime64[ns](1), float64(6), int64(2)
memory usage: 84.4 MB


In [13]:
complete_df.isna().sum()

timestamp         0
act_x          2347
act_y          2347
act_z          2347
acw_x         40633
acw_y         40633
acw_z         40633
subject_id        0
activity          0
dtype: int64

In [14]:
complete_df.sample(20)

Unnamed: 0,timestamp,act_x,act_y,act_z,acw_x,acw_y,acw_z,subject_id,activity
146740,2018-06-06 11:39:06.480,-0.3125,-0.765625,-0.484375,-0.640625,-0.625,0.1875,4,5
20670,2018-11-08 11:46:34.290,-0.078125,-0.1875,-0.96875,0.351562,-0.171875,0.835938,1,4
276838,2018-06-13 16:24:36.640,-0.078125,-0.984375,0.03125,-0.34375,0.953125,0.0,7,5
877850,2019-03-07 12:17:27.910,0.578125,0.71875,-0.078125,0.96875,0.03125,-0.203125,22,3
780188,2019-02-20 12:48:54.420,-0.65625,0.390625,0.578125,-0.625,0.046875,-0.828125,20,1
51946,2019-02-20 14:23:14.100,-0.421875,0.828125,-0.28125,0.453125,0.15625,-0.921875,2,2
714393,2019-02-14 11:49:48.370,-0.578125,0.75,-0.171875,-0.984375,-0.1875,-0.234375,18,3
489290,2018-10-11 15:58:57.770,-0.96875,-0.28125,0.09375,-0.28125,0.984375,-0.171875,12,6
91412,2018-06-06 09:16:09.450,-0.5,0.796875,-0.234375,-1.015625,-0.03125,-0.078125,3,2
505322,2018-11-08 12:15:50.770,0.75,-0.59375,-0.070312,0.382812,0.125,-0.9375,13,2
