In [1]:
"""
Note: When running this cell it will prompt you to add a file named kaggle.json in order to proceed and download
the dataset directly from kaggle. Just make a kaggle account, go to account settings, then click "Create New Token".
It will download a file called kaggle.json which you will add to the prompt below. The dataset will then install.
"""
!pip install -q kaggle

# Upload the kaggle.json file
from google.colab import files
files.upload()  # This will prompt you to upload your kaggle.json file

# Move kaggle.json to the correct directory
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json  # Set permissions to secure the API key

# Download the SisFall dataset using the Kaggle API
!kaggle datasets download -d nvnikhil0001/sis-fall-original-dataset -p /content;

# Unzip the dataset into a folder named 'sis-fall-original-dataset' in the Colab environment
!unzip /content/sis-fall-original-dataset.zip -d /content/sis-fall-original-dataset;



Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/nvnikhil0001/sis-fall-original-dataset
License(s): unknown
Downloading sis-fall-original-dataset.zip to /content
 99% 225M/227M [00:03<00:00, 80.3MB/s]
100% 227M/227M [00:03<00:00, 68.2MB/s]
Archive:  /content/sis-fall-original-dataset.zip
  inflating: /content/sis-fall-original-dataset/SisFall_dataset/Readme.txt  
  inflating: /content/sis-fall-original-dataset/SisFall_dataset/SA01/D01_SA01_R01.txt  
  inflating: /content/sis-fall-original-dataset/SisFall_dataset/SA01/D02_SA01_R01.txt  
  inflating: /content/sis-fall-original-dataset/SisFall_dataset/SA01/D03_SA01_R01.txt  
  inflating: /content/sis-fall-original-dataset/SisFall_dataset/SA01/D04_SA01_R01.txt  
  inflating: /content/sis-fall-original-dataset/SisFall_dataset/SA01/D05_SA01_R01.txt  
  inflating: /content/sis-fall-original-dataset/SisFall_dataset/SA01/D05_SA01_R02.txt  
  inflating: /content/sis-fall-original-dataset/SisFall_dataset/SA01/D05_SA0

In [2]:
#Contents of dataset
!ls /content/sis-fall-original-dataset/SisFall_dataset/

Readme.txt  SA04  SA08	SA12  SA16  SA20  SE01	SE05  SE09  SE13
SA01	    SA05  SA09	SA13  SA17  SA21  SE02	SE06  SE10  SE14
SA02	    SA06  SA10	SA14  SA18  SA22  SE03	SE07  SE11  SE15
SA03	    SA07  SA11	SA15  SA19  SA23  SE04	SE08  SE12  Supplementary.pdf


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.patches as patches
import os

In [4]:
#Path to dataset
path = '/content/sis-fall-original-dataset/SisFall_dataset/'

l = os.listdir(path)  #Get the list of all directories and files in the dataset
l.sort()


f = []  #List for paths starting with 'F'
d = []  #List for paths starting with 'D'


for sub_fol in l[1:39]:  #Range to exclude .txt and .pdf file
    subfolder_path = os.path.join(path, sub_fol)  #Build the full path for the subfolder

    #Check if the entry is a directory
    if os.path.isdir(subfolder_path):
        for sub_sub_fol in os.listdir(subfolder_path):  #List all files in the subfolder
            path_ = os.path.join(subfolder_path, sub_sub_fol)  #Construct the full path
            if sub_sub_fol.startswith('F'):  #Check if the file starts with 'F'
                f.append(path_)  #Add to F list
            elif sub_sub_fol.startswith('D'):  #Check if the file starts with 'D'
                d.append(path_)  #Add to D list



In [5]:
l

['Readme.txt',
 'SA01',
 'SA02',
 'SA03',
 'SA04',
 'SA05',
 'SA06',
 'SA07',
 'SA08',
 'SA09',
 'SA10',
 'SA11',
 'SA12',
 'SA13',
 'SA14',
 'SA15',
 'SA16',
 'SA17',
 'SA18',
 'SA19',
 'SA20',
 'SA21',
 'SA22',
 'SA23',
 'SE01',
 'SE02',
 'SE03',
 'SE04',
 'SE05',
 'SE06',
 'SE07',
 'SE08',
 'SE09',
 'SE10',
 'SE11',
 'SE12',
 'SE13',
 'SE14',
 'SE15',
 'Supplementary.pdf']

In [6]:
print(len(f))
print(len(d))

1798
2707


In [None]:
#This takes substantial time to run
df = pd.DataFrame(columns=['Column1', 'Column2', 'Column3', 'Column4', 'Column5', 'Column6', 'Column7', 'Column8', 'Column9', 'Column10'])
for p in range(len(f)):
    with open(f[p],'r') as file:
        r = file.read()
    r = r.replace(' ','')
    r = r.split(';\n')
    r = [i.split(',') for i in r]
    to_drop_ind = []
    for i in range(len(r)):
        if(r[i] == ['']):
            to_drop_ind.append(i)
    c = 0
    for i in to_drop_ind:
        r.pop(i-c)
        c+=1
    r = [[int(i) for i in j] for j in r]
    for i in range(len(r)):
        r[i].append(1)
    import pandas as pd
    new_df = pd.DataFrame(r, columns=df.columns)
    df = pd.concat([df, new_df], ignore_index=True)
    if(p%10==0):
        print(f"Epoch : {p}")

Epoch : 0
Epoch : 10
Epoch : 20
Epoch : 30
Epoch : 40
Epoch : 50
Epoch : 60
Epoch : 70
Epoch : 80
Epoch : 90
Epoch : 100
Epoch : 110
Epoch : 120
Epoch : 130
Epoch : 140
Epoch : 150
Epoch : 160
Epoch : 170
Epoch : 180
Epoch : 190
Epoch : 200
Epoch : 210
Epoch : 220
Epoch : 230
Epoch : 240


KeyboardInterrupt: 

In [8]:
"""
This is an optimized code that cleans data and adds it to a list before putting it
into a dataframe. Previous code added new dataframe for every file in dataset.
This one only puts list into one dataframe at the end.
"""
import pandas as pd

# List to collect all rows of data
f_data = []

# Iterate through each file path in 'f'
for p, path in enumerate(f):
    with open(path, 'r') as file:
        content = file.read()

    # Clean and process the content
    content = content.replace(' ', '').split(';\n')

    # Split each line by comma and filter out empty lines
    rows = [line.split(',') for line in content if line]

    # Convert to integers and append 1 to each row
    rows = [[int(value) for value in row] + [1] for row in rows if row != ['']]

    # Extend the main data list with the processed rows
    f_data.extend(rows)

    # Print progress every 10 iterations
    if p % 10 == 0:
        print(f"Epoch: {p}")

# Create a DataFrame once at the end
df_optimized_f = pd.DataFrame(f_data, columns=(['Acceleration in X axis for ADXL345',
                           'Acceleration in Y axis for ADXL345',
                           'Acceleration in Z axis for ADXL345',
                           'Rotation in X axis for ITG3200',
                           'Rotation in Y axis for ITG3200',
                           'Rotation in Z axis for ITG3200',
                           'Acceleration in X axis for MMA8451Q',
                           'Acceleration in Y axis for MMA8451Q',
                           'Acceleration in Z axis for MMA8451Q',
                           'column10']))



Epoch: 0
Epoch: 10
Epoch: 20
Epoch: 30
Epoch: 40
Epoch: 50
Epoch: 60
Epoch: 70
Epoch: 80
Epoch: 90
Epoch: 100
Epoch: 110
Epoch: 120
Epoch: 130
Epoch: 140
Epoch: 150
Epoch: 160
Epoch: 170
Epoch: 180
Epoch: 190
Epoch: 200
Epoch: 210
Epoch: 220
Epoch: 230
Epoch: 240
Epoch: 250
Epoch: 260
Epoch: 270
Epoch: 280
Epoch: 290
Epoch: 300
Epoch: 310
Epoch: 320
Epoch: 330
Epoch: 340
Epoch: 350
Epoch: 360
Epoch: 370
Epoch: 380
Epoch: 390
Epoch: 400
Epoch: 410
Epoch: 420
Epoch: 430
Epoch: 440
Epoch: 450
Epoch: 460
Epoch: 470
Epoch: 480
Epoch: 490
Epoch: 500
Epoch: 510
Epoch: 520
Epoch: 530
Epoch: 540
Epoch: 550
Epoch: 560
Epoch: 570
Epoch: 580
Epoch: 590
Epoch: 600
Epoch: 610
Epoch: 620
Epoch: 630
Epoch: 640
Epoch: 650
Epoch: 660
Epoch: 670
Epoch: 680
Epoch: 690
Epoch: 700
Epoch: 710
Epoch: 720
Epoch: 730
Epoch: 740
Epoch: 750
Epoch: 760
Epoch: 770
Epoch: 780
Epoch: 790
Epoch: 800
Epoch: 810
Epoch: 820
Epoch: 830
Epoch: 840
Epoch: 850
Epoch: 860
Epoch: 870
Epoch: 880
Epoch: 890
Epoch: 900
Epoch: 910

In [None]:
df.shape

[]


In [None]:
#This takes substantial time to run
for p in range(len(d)):
    with open(d[p],'r') as file:
        r = file.read()
    r = r.replace(' ','')
    r = r.split(';\n')
    r = [i.split(',') for i in r]
    to_drop_ind = []
    for i in range(len(r)):
        if(r[i] == ['']):
            to_drop_ind.append(i)
    c = 0
    for i in to_drop_ind:
        r.pop(i-c)
        c+=1
    r = [[int(i) for i in j] for j in r]
    for i in range(len(r)):
        r[i].append(0)
    import pandas as pd
    new_df = pd.DataFrame(r, columns=df.columns)
    df = pd.concat([df, new_df], ignore_index=True)
    if(p%10==0):
        print(f"Epoch : {p}")
    if(p==1798):
        break

In [10]:
"""
This is an optimized code that cleans data and adds it to a list before putting it
into a dataframe. Previous code added new dataframe for every file in dataset.
This one only puts list into one dataframe at the end.
"""
import pandas as pd

# List to collect all rows of data
d_data = []

# Iterate through each file path in 'd'
for p, path in enumerate(d):
    with open(path, 'r') as file:
        content = file.read()

    # Clean and process the content
    content = content.replace(' ', '').split(';\n')

    # Split each line by comma and filter out empty lines
    rows = [line.split(',') for line in content if line]

    # Convert to integers and append 1 to each row
    rows = [[int(value) for value in row] + [1] for row in rows if row != ['']]

    # Extend the main data list with the processed rows
    d_data.extend(rows)

    # Print progress every 10 iterations
    if p % 10 == 0:
        print(f"Epoch: {p}")

# Create a DataFrame once at the end
df_optimized_d = pd.DataFrame(d_data, columns=(['Acceleration in X axis for ADXL345',
                           'Acceleration in Y axis for ADXL345',
                           'Acceleration in Z axis for ADXL345',
                           'Rotation in X axis for ITG3200',
                           'Rotation in Y axis for ITG3200',
                           'Rotation in Z axis for ITG3200',
                           'Acceleration in X axis for MMA8451Q',
                           'Acceleration in Y axis for MMA8451Q',
                           'Acceleration in Z axis for MMA8451Q',
                           'column10']))



Epoch: 0
Epoch: 10
Epoch: 20
Epoch: 30
Epoch: 40
Epoch: 50
Epoch: 60
Epoch: 70
Epoch: 80
Epoch: 90
Epoch: 100
Epoch: 110
Epoch: 120
Epoch: 130
Epoch: 140
Epoch: 150
Epoch: 160
Epoch: 170
Epoch: 180
Epoch: 190
Epoch: 200
Epoch: 210
Epoch: 220
Epoch: 230
Epoch: 240
Epoch: 250
Epoch: 260
Epoch: 270
Epoch: 280
Epoch: 290
Epoch: 300
Epoch: 310
Epoch: 320
Epoch: 330
Epoch: 340
Epoch: 350
Epoch: 360
Epoch: 370
Epoch: 380
Epoch: 390
Epoch: 400
Epoch: 410
Epoch: 420
Epoch: 430
Epoch: 440
Epoch: 450
Epoch: 460
Epoch: 470
Epoch: 480
Epoch: 490
Epoch: 500
Epoch: 510
Epoch: 520
Epoch: 530
Epoch: 540
Epoch: 550
Epoch: 560
Epoch: 570
Epoch: 580
Epoch: 590
Epoch: 600
Epoch: 610
Epoch: 620
Epoch: 630
Epoch: 640
Epoch: 650
Epoch: 660
Epoch: 670
Epoch: 680
Epoch: 690
Epoch: 700
Epoch: 710
Epoch: 720
Epoch: 730
Epoch: 740
Epoch: 750
Epoch: 760
Epoch: 770
Epoch: 780
Epoch: 790
Epoch: 800
Epoch: 810
Epoch: 820
Epoch: 830
Epoch: 840
Epoch: 850
Epoch: 860
Epoch: 870
Epoch: 880
Epoch: 890
Epoch: 900
Epoch: 910

In [23]:
df.shape()

NameError: name 'df' is not defined

In [12]:
df_optimized_d.head()

Unnamed: 0,Acceleration in X axis for ADXL345,Acceleration in Y axis for ADXL345,Acceleration in Z axis for ADXL345,Rotation in X axis for ITG3200,Rotation in Y axis for ITG3200,Rotation in Z axis for ITG3200,Acceleration in X axis for MMA8451Q,Acceleration in Y axis for MMA8451Q,Acceleration in Z axis for MMA8451Q,column10
0,7,-255,-13,-1,-3,18,14,-996,70,1
1,6,-255,-11,-34,-9,14,13,-1002,82,1
2,2,-259,-7,-54,-11,12,13,-1007,96,1
3,3,-260,-7,-63,-12,11,8,-1011,89,1
4,0,-260,-7,-63,-12,12,0,-1015,88,1
