In [1]:
import warnings
import threading
warnings.filterwarnings('ignore')
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from scipy import stats
from sklearn.ensemble import RandomForestClassifier

In [2]:
'''
This cell is used to run the model on a single User's Glass and phone data. This is done to make sure 
what contents are stored in the data frame. 
'''

# Define paths to folders containing training data
glass_folder_path = r'C:/Users/sudha/Desktop/Semester 2/MLS/Project 2/New folder/Training Data/User001/Glass'
htc_folder_path = r'C:/Users/sudha/Desktop/Semester 2/MLS/Project 2/New folder/Training Data/User001/HTC - front'

# Define function to read gyro data from a given file
def read_gyro_data(file_path):
    df = pd.read_csv(file_path, usecols=[0, 1, 2, 3])
    df.columns = ['timestamp', 'X', 'Y', 'Z']
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S:%f')
    df.sort_values('timestamp', inplace=True)
    return df

# Read gyro data from both files
glass_df = read_gyro_data(os.path.join(glass_folder_path, 'gyroData.csv'))
htc_df = read_gyro_data(os.path.join(htc_folder_path, 'gyroDataM.csv'))

# Concatenate the two dataframes
combined_df = pd.concat([glass_df, htc_df])

# Create separate dataframe for timestamps
timestamps_df = combined_df[['timestamp']].copy()

print('Combined Dataframe:')
print(combined_df)

print('\nTimestamps Dataframe:')
print(timestamps_df)


Combined Dataframe:
                   timestamp         X         Y         Z
0    2017-03-31 13:42:19.011  0.010653  0.002131 -0.023436
1    2017-03-31 13:42:19.211 -0.005326  0.001065 -0.051133
2    2017-03-31 13:42:19.409 -0.022371  0.020240 -0.007457
3    2017-03-31 13:42:19.601  0.053263 -0.024501 -0.099070
4    2017-03-31 13:42:19.832  0.242880  0.023436  0.450607
...                      ...       ...       ...       ...
5545 2017-03-31 13:45:02.703 -0.040000  0.080000  0.000000
5546 2017-03-31 13:45:02.706 -0.030000  0.090000  0.000000
5547 2017-03-31 13:45:02.732 -0.040000  0.050000  0.000000
5548 2017-03-31 13:45:02.763 -0.050000  0.090000 -0.010000
5549 2017-03-31 13:45:02.767 -0.140000 -0.210000  0.030000

[6533 rows x 4 columns]

Timestamps Dataframe:
                   timestamp
0    2017-03-31 13:42:19.011
1    2017-03-31 13:42:19.211
2    2017-03-31 13:42:19.409
3    2017-03-31 13:42:19.601
4    2017-03-31 13:42:19.832
...                      ...
5545 2017-03-31 13:45

In [4]:
'''
This cell deals with the feature extraction of the data frame created above. This cell is specifically made
to print out how the extracted features look and how are they stored.
'''
# Extract features from columns X, Y, and Z
x_features = combined_df[['X']].describe()
y_features = combined_df[['Y']].describe()
z_features = combined_df[['Z']].describe()

x_skew = combined_df['X'].skew()
y_skew = combined_df['Y'].skew()
z_skew = combined_df['Z'].skew()


# Print the extracted features
print('X Features:')
print(x_features)
print('X Skew:', x_skew)

print('Y Features:')
print(y_features)
print('Y Skew:', y_skew)

print('Z Features:')
print(z_features)
print('Z Skew:', z_skew)


X Features:
                 X
count  6533.000000
mean      0.029137
std       1.103641
min      -7.911719
25%      -0.470000
50%       0.060720
75%       0.770000
max       4.150000
X Skew: -0.5723376389831188
Y Features:
                 Y
count  6533.000000
mean     -0.009780
std       1.225926
min      -6.150000
25%      -0.680000
50%       0.020240
75%       0.660000
max       6.293582
Y Skew: -0.03649729527872387
Z Features:
                 Z
count  6533.000000
mean     -0.056905
std       0.913052
min      -5.303952
25%      -0.500000
50%       0.030000
75%       0.540000
max       4.040000
Z Skew: -0.5868365828180566


In [5]:
'''
Training Phase

'''
# Define parent folder path
parent_folder_path = r'C:\Users\sudha\Desktop\Semester 2\MLS\Project 2\New folder\Data of Head and Torso movement\data\Training Data\\'

# Define function to read gyro data from a given file
def read_gyro_data(file_path):
    df = pd.read_csv(file_path, usecols=[0, 1, 2, 3])
    df.columns = ['timestamp', 'X', 'Y', 'Z']
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y-%m-%d %H:%M:%S:%f')
    df.sort_values('timestamp', inplace=True)
    return df   


# Define column and index labels
col_labels = pd.MultiIndex.from_tuples([('count', 'X'), ('count', 'Y'), ('count', 'Z'),
                                        ('mean', 'X'), ('mean', 'Y'), ('mean', 'Z'),
                                        ('std', 'X'), ('std', 'Y'), ('std', 'Z'),
                                        ('min', 'X'), ('min', 'Y'), ('min', 'Z'),
                                        ('25%', 'X'), ('25%', 'Y'), ('25%', 'Z'),
                                        ('50%', 'X'), ('50%', 'Y'), ('50%', 'Z'),
                                        ('75%', 'X'), ('75%', 'Y'), ('75%', 'Z'),
                                        ('max', 'X'), ('max', 'Y'), ('max', 'Z')])



# Create empty dataframe with column and index labels
empty_df = pd.DataFrame(columns=col_labels).assign(Label=[])



for i in range(1, 18):
    user_folder_path = os.path.join(parent_folder_path, f'User{i:03}')
    glass_file_path = os.path.join(user_folder_path, 'Glass/gyroData.csv')
    htc_file_path = os.path.join(user_folder_path, 'HTC - front/gyroDataM.csv')
    glass_df = read_gyro_data(glass_file_path)
    htc_df = read_gyro_data(htc_file_path)
    #glass_features = glass_df[['X', 'Y', 'Z']].describe()
    glass_features = glass_df[['X', 'Y', 'Z']].describe().stack().to_frame().transpose()
    glass_features = glass_features .assign(Label=[f'User{i:03}_glass'])
    empty_df = pd.concat([empty_df, glass_features])
    
    htc_features = htc_df[['X', 'Y', 'Z']].describe().stack().to_frame().transpose()
    htc_features = htc_features .assign(Label=[f'User{i:03}_htc'])
    empty_df = pd.concat([empty_df, htc_features])

copy_df = empty_df
    
X_train = copy_df.drop(columns=["Label"])
y_train = empty_df["Label"]

print(len(y_train))

# Train random forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
    


34


In [6]:
'''
Testing Phase

'''

glass_folder_path_test = r'C:\Users\sudha\Desktop\Semester 2\MLS\Project 2\New folder\Data of Head and Torso movement\data\Testing Data\\'
#htc_folder_path_test = r'C:\Users\sudha\Desktop\Semester 2\MLS\Project 2\New folder\Data of Head and Torso movement\data\Testing Data\\\'

col_labels = pd.MultiIndex.from_tuples([('count', 'X'), ('count', 'Y'), ('count', 'Z'),
                                        ('mean', 'X'), ('mean', 'Y'), ('mean', 'Z'),
                                        ('std', 'X'), ('std', 'Y'), ('std', 'Z'),
                                        ('min', 'X'), ('min', 'Y'), ('min', 'Z'),
                                        ('25%', 'X'), ('25%', 'Y'), ('25%', 'Z'),
                                        ('50%', 'X'), ('50%', 'Y'), ('50%', 'Z'),
                                        ('75%', 'X'), ('75%', 'Y'), ('75%', 'Z'),
                                        ('max', 'X'), ('max', 'Y'), ('max', 'Z')])



# Create empty dataframe with column and index labels
test_df = pd.DataFrame(columns=col_labels).assign(Label=[])
user_folder_path = os.path.join(parent_folder_path, f'User002')
glass_file_path = os.path.join(user_folder_path, 'HTC - front/gyroDataM.csv')

test_glass_df = read_gyro_data(glass_file_path)

#glass_features = glass_df[['X', 'Y', 'Z']].describe()
test_glass_features = test_glass_df[['X', 'Y', 'Z']].describe().stack().to_frame().transpose()
test_glass_features = test_glass_features .assign(Label=[f'User002_htc'])
test_df = pd.concat([test_df, test_glass_features])    

In [7]:
'''
Outcome Prediction

'''

copy_test = test_df
x_test = copy_test.drop(columns=["Label"])
y_test = test_df["Label"]

y_pred = rf_classifier.predict(x_test)
print("The predicted user is:", y_pred)

accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {accuracy}')

The predicted user is: ['User002_htc']
Test accuracy: 1.0


In [8]:
authorized_users = ["User001_glass", "User001_htc", "User002_glass", "User002_htc", "User005_glass", "User005_htc", "User015_glass", "User015_htc"]

if y_pred in authorized_users:
    print("ALLOWED")
else:
    print("DENIED")

ALLOWED
