In [49]:
#import packages
import pandas as pd
import numpy as np
import folium

#ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [31]:
# Step 1: Load the data
data = pd.read_csv('/Users/mehakrafiq/Library/CloudStorage/OneDrive-NationalUniversityofSciences&Technology/Rapidev/SmartFile.csv')

# Step 2: Sort the data by timestamp
data.sort_values('TimeStamp', inplace=True)

# Step 3: Create a map centered around the first coordinate
m = folium.Map(location=[data.iloc[0]['Latitude'], data.iloc[0]['Longitude']], zoom_start=20)

# Step 4: Plot the trajectory as a line on the map
for i in range(1, len(data)):
    folium.PolyLine([(data.iloc[i-1]['Latitude'], data.iloc[i-1]['Longitude']), 
                     (data.iloc[i]['Latitude'], data.iloc[i]['Longitude'])], color='blue').add_to(m)
    
# Save the map to an HTML file
map_filepath = 'walk_pattern_map.html'
m.save(map_filepath)

map_filepath


'walk_pattern_map.html'

In [32]:
from geopy.distance import geodesic

# Step 1: Handle missing values by removing rows with NaN values in Latitude and Longitude columns
data.dropna(subset=['Latitude', 'Longitude'], inplace=True)

# Step 2: Remove rows with same Latitude and Longitude as the previous row
data = data.loc[(data['Latitude'].shift() != data['Latitude']) | (data['Longitude'].shift() != data['Longitude'])]

# Step 3: Remove random jumps by setting a threshold for the maximum allowable distance between consecutive points (e.g., 100 meters)
max_distance_m = 100
for i in range(1, len(data)):
    coord1 = (data.iloc[i-1]['Latitude'], data.iloc[i-1]['Longitude'])
    coord2 = (data.iloc[i]['Latitude'], data.iloc[i]['Longitude'])
    if geodesic(coord1, coord2).meters > max_distance_m:
        data.iloc[i, data.columns.get_loc('Latitude')] = data.iloc[i-1]['Latitude']
        data.iloc[i, data.columns.get_loc('Longitude')] = data.iloc[i-1]['Longitude']

# Display the first few rows of the preprocessed data
data.head()


Unnamed: 0,Soldier_ID,TimeStamp,HeartRate,Latitude,Longitude,Accelerometer_X,Accelerometer_Y,Accelerometer_Z,Gyro_X,Gyro_Y,Gyro_Z,User_Activity
0,0,2023-09-08 16:23:07.200,-10,33.64616,72.99664,-1.01695,2.4,9.21237,0.51646,-0.85545,0.00549,
1,0,2023-09-08 16:23:07.779,-10,33.64617,72.99666,-1.07198,2.73739,8.9683,0.51204,-0.85745,0.00845,
4,0,2023-09-08 16:23:10.922,-10,33.64616,72.99666,-0.06939,1.24666,9.56411,0.56297,-0.82643,0.00436,
10,0,2023-09-08 16:23:16.897,-10,33.64615,72.99666,-2.98864,0.38285,9.97568,0.44209,-0.89592,-0.03627,
13,0,2023-09-08 16:23:19.875,-10,33.64615,72.99665,1.78505,-1.78744,11.81815,-0.33094,0.94339,-0.00965,


In [33]:
# Create a map centered around the first coordinate of the preprocessed data
m = folium.Map(location=[data.iloc[0]['Latitude'], data.iloc[0]['Longitude']], zoom_start=20)

# Plot the trajectory as a line on the map using the preprocessed data
for i in range(1, len(data)):
    folium.PolyLine([(data.iloc[i-1]['Latitude'], data.iloc[i-1]['Longitude']), 
                     (data.iloc[i]['Latitude'], data.iloc[i]['Longitude'])], color='blue').add_to(m)
    
# Save the map to an HTML file
map_filepath = 'walk_pattern_map_preprocessed.html'
m.save(map_filepath)

map_filepath

'walk_pattern_map_preprocessed.html'

In [34]:
# Apply a moving average filter with a window size of 5 to the Latitude and Longitude columns to smooth the data
window_size = 5
data['Latitude_smooth'] = data['Latitude'].rolling(window=window_size, center=True).mean()
data['Longitude_smooth'] = data['Longitude'].rolling(window=window_size, center=True).mean()

# Remove the rows with NaN values generated due to smoothing
data.dropna(subset=['Latitude_smooth', 'Longitude_smooth'], inplace=True)

# Create a map centered around the first coordinate of the smoothed data
m = folium.Map(location=[data.iloc[0]['Latitude_smooth'], data.iloc[0]['Longitude_smooth']], zoom_start=20)

# Plot the trajectory as a line on the map using the smoothed data
for i in range(1, len(data)):
    folium.PolyLine([(data.iloc[i-1]['Latitude_smooth'], data.iloc[i-1]['Longitude_smooth']), 
                     (data.iloc[i]['Latitude_smooth'], data.iloc[i]['Longitude_smooth'])], color='blue').add_to(m)
    
# Save the map to an HTML file
map_filepath_smooth = 'walk_pattern_map_smooth.html'
m.save(map_filepath_smooth)

map_filepath_smooth


'walk_pattern_map_smooth.html'

In [35]:
# Convert the TimeStamp column to datetime data type
data['TimeStamp'] = pd.to_datetime(data['TimeStamp'])

# Set the TimeStamp column as the index
data.set_index('TimeStamp', inplace=True)

# Resample the data to obtain a single location per minute by taking the mean of the Latitude and Longitude values within each minute
data_resampled = data[['Latitude', 'Longitude']].resample('1T').mean().dropna()

# Create a map centered around the first coordinate of the resampled data
m = folium.Map(location=[data_resampled.iloc[0]['Latitude'], data_resampled.iloc[0]['Longitude']], zoom_start=20)

# Plot the trajectory as a line on the map using the resampled data
for i in range(1, len(data_resampled)):
    folium.PolyLine([(data_resampled.iloc[i-1]['Latitude'], data_resampled.iloc[i-1]['Longitude']), 
                     (data_resampled.iloc[i]['Latitude'], data_resampled.iloc[i]['Longitude'])], color='blue').add_to(m)
    
# Save the map to an HTML file
map_filepath_resampled = 'walk_pattern_map_resampled.html'
m.save(map_filepath_resampled)

map_filepath_resampled


'walk_pattern_map_resampled.html'

In [36]:
# Resample the data to obtain a single location every 2 minutes by taking the mean of the Latitude and Longitude values within each 2-minute interval
data_resampled_2min = data[['Latitude', 'Longitude']].resample('2T').mean().dropna()

# Create a map centered around the first coordinate of the 2-minute resampled data
m = folium.Map(location=[data_resampled_2min.iloc[0]['Latitude'], data_resampled_2min.iloc[0]['Longitude']], zoom_start=20)

# Plot the trajectory as a line on the map using the 2-minute resampled data
for i in range(1, len(data_resampled_2min)):
    folium.PolyLine([(data_resampled_2min.iloc[i-1]['Latitude'], data_resampled_2min.iloc[i-1]['Longitude']), 
                     (data_resampled_2min.iloc[i]['Latitude'], data_resampled_2min.iloc[i]['Longitude'])], color='blue').add_to(m)
    
# Save the map to an HTML file
map_filepath_resampled_2min = 'walk_pattern_map_resampled_2min.html'
m.save(map_filepath_resampled_2min)

map_filepath_resampled_2min


'walk_pattern_map_resampled_2min.html'

In [37]:
# Resample the data to obtain a single location every 5 minutes by taking the mean of the Latitude and Longitude values within each 5-minute interval
data_resampled_5min = data[['Latitude', 'Longitude']].resample('5T').mean().dropna()

# Create a map centered around the first coordinate of the 5-minute resampled data
m = folium.Map(location=[data_resampled_5min.iloc[0]['Latitude'], data_resampled_5min.iloc[0]['Longitude']], zoom_start=20)

# Plot the trajectory as a line on the map using the 5-minute resampled data
for i in range(1, len(data_resampled_5min)):
    folium.PolyLine([(data_resampled_5min.iloc[i-1]['Latitude'], data_resampled_5min.iloc[i-1]['Longitude']), 
                     (data_resampled_5min.iloc[i]['Latitude'], data_resampled_5min.iloc[i]['Longitude'])], color='blue').add_to(m)
    
# Save the map to an HTML file
map_filepath_resampled_5min = 'walk_pattern_map_resampled_5min.html'
m.save(map_filepath_resampled_5min)

map_filepath_resampled_5min


'walk_pattern_map_resampled_5min.html'

In [38]:
# Apply a moving average filter with a window size of 3 to the 2-minute resampled Latitude and Longitude columns to smooth the data
window_size = 3
data_resampled_2min['Latitude_smooth'] = data_resampled_2min['Latitude'].rolling(window=window_size, center=True).mean()
data_resampled_2min['Longitude_smooth'] = data_resampled_2min['Longitude'].rolling(window=window_size, center=True).mean()

# Remove the rows with NaN values generated due to smoothing
data_resampled_2min.dropna(subset=['Latitude_smooth', 'Longitude_smooth'], inplace=True)

# Create a map centered around the first coordinate of the smoothed and resampled data
m = folium.Map(location=[data_resampled_2min.iloc[0]['Latitude_smooth'], data_resampled_2min.iloc[0]['Longitude_smooth']], zoom_start=20)

# Plot the trajectory as a line on the map using the smoothed and resampled data
for i in range(1, len(data_resampled_2min)):
    folium.PolyLine([(data_resampled_2min.iloc[i-1]['Latitude_smooth'], data_resampled_2min.iloc[i-1]['Longitude_smooth']), 
                     (data_resampled_2min.iloc[i]['Latitude_smooth'], data_resampled_2min.iloc[i]['Longitude_smooth'])], color='blue').add_to(m)
    
# Save the map to an HTML file
map_filepath_resampled_smooth_2min = 'walk_pattern_map_resampled_smooth_2min.html'
m.save(map_filepath_resampled_smooth_2min)

map_filepath_resampled_smooth_2min


'walk_pattern_map_resampled_smooth_2min.html'

In [39]:
# Apply a moving average filter with a window size of 3 to the original Latitude and Longitude columns to smooth the data
window_size = 5
data['Latitude_smooth'] = data['Latitude'].rolling(window=window_size, center=True).mean()
data['Longitude_smooth'] = data['Longitude'].rolling(window=window_size, center=True).mean()

# Remove the rows with NaN values generated due to smoothing
data.dropna(subset=['Latitude_smooth', 'Longitude_smooth'], inplace=True)

# Resample the smoothed data to obtain a single location every 1 minute
data_resampled_smooth_1min = data[['Latitude_smooth', 'Longitude_smooth']].resample('1T').mean().dropna()

# Create a map centered around the first coordinate of the 1-minute resampled and smoothed data
m = folium.Map(location=[data_resampled_smooth_1min.iloc[0]['Latitude_smooth'], data_resampled_smooth_1min.iloc[0]['Longitude_smooth']], zoom_start=20)

# Plot the trajectory as a line on the map using the 1-minute resampled and smoothed data
for i in range(1, len(data_resampled_smooth_1min)):
    folium.PolyLine([(data_resampled_smooth_1min.iloc[i-1]['Latitude_smooth'], data_resampled_smooth_1min.iloc[i-1]['Longitude_smooth']), 
                     (data_resampled_smooth_1min.iloc[i]['Latitude_smooth'], data_resampled_smooth_1min.iloc[i]['Longitude_smooth'])], color='blue').add_to(m)
    
# Save the map to an HTML file
map_filepath_resampled_smooth_1min = 'walk_pattern_map_resampled_smooth_1min.html'
m.save(map_filepath_resampled_smooth_1min)

map_filepath_resampled_smooth_1min


'walk_pattern_map_resampled_smooth_1min.html'

In [46]:
# Get the total length of the data and the time range it covers
data_length = len(data)
data_time_range = data.index[-1] - data.index[0]

data_length, data_time_range


(443, Timedelta('4 days 19:37:35.226000'))

In [42]:
from scipy.signal import savgol_filter

# Apply Savitzky-Golay smoothing with a window length of 5 and polynomial order of 2
data['Latitude_smooth'] = savgol_filter(data['Latitude'], window_length=5, polyorder=2)
data['Longitude_smooth'] = savgol_filter(data['Longitude'], window_length=5, polyorder=2)

# Resample the smoothed data to obtain a single location every 2 minutes
data_resampled_smooth_2min = data[['Latitude_smooth', 'Longitude_smooth']].resample('2T').mean().dropna()

# Create a map centered around the first coordinate of the 2-minute resampled and smoothed data
m = folium.Map(location=[data_resampled_smooth_2min.iloc[0]['Latitude_smooth'], data_resampled_smooth_2min.iloc[0]['Longitude_smooth']], zoom_start=20)

# Plot the trajectory as a line on the map using the 2-minute resampled and smoothed data
for i in range(1, len(data_resampled_smooth_2min)):
    folium.PolyLine([(data_resampled_smooth_2min.iloc[i-1]['Latitude_smooth'], data_resampled_smooth_2min.iloc[i-1]['Longitude_smooth']), 
                     (data_resampled_smooth_2min.iloc[i]['Latitude_smooth'], data_resampled_smooth_2min.iloc[i]['Longitude_smooth'])], color='blue').add_to(m)
    
# Save the map to an HTML file
map_filepath_savgol_2min = 'walk_pattern_map_savgol_resampled_2min.html'
m.save(map_filepath_savgol_2min)

map_filepath_savgol_2min


'walk_pattern_map_savgol_resampled_2min.html'

In [44]:
# Apply Savitzky-Golay smoothing with a window length of 5 and polynomial order of 2
data['Latitude_smooth'] = savgol_filter(data['Latitude'], window_length=5, polyorder=2)
data['Longitude_smooth'] = savgol_filter(data['Longitude'], window_length=5, polyorder=2)

# Resample the smoothed data to obtain a single location every 30 seconds
data_resampled_smooth_30s = data[['Latitude_smooth', 'Longitude_smooth']].resample('30S').mean().dropna()

# Create a map centered around the first coordinate of the 30-second resampled and smoothed data
m = folium.Map(location=[data_resampled_smooth_30s.iloc[0]['Latitude_smooth'], data_resampled_smooth_30s.iloc[0]['Longitude_smooth']], zoom_start=16)

# Plot the trajectory as a line on the map using the 30-second resampled and smoothed data
# Using different colors for paths with a minimum of 30 seconds difference at the same place
previous_coord = None
for i in range(1, len(data_resampled_smooth_30s)):
    current_coord = (data_resampled_smooth_30s.iloc[i]['Latitude_smooth'], data_resampled_smooth_30s.iloc[i]['Longitude_smooth'])
    color = 'blue' if current_coord != previous_coord else 'red'
    
    # Highlight the areas where the person has been stationary for at least 30 seconds
    if color == 'red':
        folium.CircleMarker(location=current_coord, radius=3, color='yellow', fill=True, fill_color='yellow').add_to(m)
    
    folium.PolyLine([(data_resampled_smooth_30s.iloc[i-1]['Latitude_smooth'], data_resampled_smooth_30s.iloc[i-1]['Longitude_smooth']), 
                     current_coord], color=color).add_to(m)
    previous_coord = current_coord

# Save the map to an HTML file
map_filepath_savgol_30s_stationary = 'walk_pattern_map_savgol_resampled_30s_stationary.html'
m.save(map_filepath_savgol_30s_stationary)

map_filepath_savgol_30s_stationary


'walk_pattern_map_savgol_resampled_30s_stationary.html'

In [45]:
from sklearn.cluster import DBSCAN

# Step 1: Identify key points using DBSCAN clustering to find areas of high density (e.g., stops) and low density (e.g., significant changes in direction)
coords = data[['Latitude', 'Longitude']].values
clustering = DBSCAN(eps=0.0001, min_samples=3).fit(coords)
data['Cluster'] = clustering.labels_

# Step 2: Apply a less aggressive Savitzky-Golay smoothing while retaining the identified key points
window_length = 5
data['Latitude_smooth'] = savgol_filter(data['Latitude'], window_length, polyorder=2)
data['Longitude_smooth'] = savgol_filter(data['Longitude'], window_length, polyorder=2)

# Retain the original coordinates for the identified key points
key_points_mask = data['Cluster'] != -1
data.loc[key_points_mask, 'Latitude_smooth'] = data.loc[key_points_mask, 'Latitude']
data.loc[key_points_mask, 'Longitude_smooth'] = data.loc[key_points_mask, 'Longitude']

# Step 3: Resample the data at a 30-second interval
data_resampled_smooth_30s = data[['Latitude_smooth', 'Longitude_smooth']].resample('30S').mean().dropna()

# Step 4: Create a map and plot the smoothed, resampled path, highlighting areas where the person was stationary for at least 30 seconds
m = folium.Map(location=[data_resampled_smooth_30s.iloc[0]['Latitude_smooth'], data_resampled_smooth_30s.iloc[0]['Longitude_smooth']], zoom_start=16)

previous_coord = None
for i in range(1, len(data_resampled_smooth_30s)):
    current_coord = (data_resampled_smooth_30s.iloc[i]['Latitude_smooth'], data_resampled_smooth_30s.iloc[i]['Longitude_smooth'])
    color = 'blue' if current_coord != previous_coord else 'red'
    
    # Highlight the areas where the person has been stationary for at least 30 seconds
    if color == 'red':
        folium.CircleMarker(location=current_coord, radius=3, color='yellow', fill=True, fill_color='yellow').add_to(m)
    
    folium.PolyLine([(data_resampled_smooth_30s.iloc[i-1]['Latitude_smooth'], data_resampled_smooth_30s.iloc[i-1]['Longitude_smooth']), 
                     current_coord], color=color).add_to(m)
    previous_coord = current_coord

# Save the map to an HTML file
map_filepath_dbscan_savgol_30s = 'walk_pattern_map_dbscan_savgol_resampled_30s.html'
m.save(map_filepath_dbscan_savgol_30s)

map_filepath_dbscan_savgol_30s


'walk_pattern_map_dbscan_savgol_resampled_30s.html'

In [47]:
# Get the total length of the data and the time range it covers for the "Live Sol" soldier
live_sol_data = data[data['Soldier_ID'] == 'Live Sol']
live_sol_data_length = len(live_sol_data)
live_sol_data_time_range = live_sol_data.index[-1] - live_sol_data.index[0]

live_sol_data_length, live_sol_data_time_range


(287, Timedelta('0 days 00:38:49.530000'))

In [51]:
# Step 1: Identify key points using DBSCAN clustering to find areas of high density (e.g., stops) and low density (e.g., significant changes in direction)
coords = live_sol_data[['Latitude', 'Longitude']].values
clustering = DBSCAN(eps=0.0001, min_samples=3).fit(coords)
live_sol_data['Cluster'] = clustering.labels_

# Step 2: Apply a slightly more aggressive Savitzky-Golay smoothing while retaining the identified key points
window_length = 7
live_sol_data['Latitude_smooth'] = savgol_filter(live_sol_data['Latitude'], window_length, polyorder=2)
live_sol_data['Longitude_smooth'] = savgol_filter(live_sol_data['Longitude'], window_length, polyorder=2)

# Retain the original coordinates for the identified key points
key_points_mask = live_sol_data['Cluster'] != -1
live_sol_data.loc[key_points_mask, 'Latitude_smooth'] = live_sol_data.loc[key_points_mask, 'Latitude']
live_sol_data.loc[key_points_mask, 'Longitude_smooth'] = live_sol_data.loc[key_points_mask, 'Longitude']

# Step 3: Resample the data at a 30-second interval
data_resampled_smooth_30s = live_sol_data[['Latitude_smooth', 'Longitude_smooth']].resample('30S').mean().dropna()

# Step 4: Create a map and plot the smoothed, resampled path, highlighting areas where the person was stationary for at least 1 minute
m = folium.Map(location=[data_resampled_smooth_30s.iloc[0]['Latitude_smooth'], data_resampled_smooth_30s.iloc[0]['Longitude_smooth']], zoom_start=20)

previous_coord = None
for i in range(1, len(data_resampled_smooth_30s)):
    current_coord = (data_resampled_smooth_30s.iloc[i]['Latitude_smooth'], data_resampled_smooth_30s.iloc[i]['Longitude_smooth'])
    color = 'blue' if current_coord != previous_coord else 'red'
    
    # Highlight the areas where the person has been stationary for at least 1 minute
    if color == 'red':
        folium.CircleMarker(location=current_coord, radius=3, color='yellow', fill=True, fill_color='yellow').add_to(m)
    
    folium.PolyLine([(data_resampled_smooth_30s.iloc[i-1]['Latitude_smooth'], data_resampled_smooth_30s.iloc[i-1]['Longitude_smooth']), 
                     current_coord], color=color).add_to(m)
    previous_coord = current_coord

# Save the map to an HTML file
map_filepath_dbscan_savgol_30s_live_sol = 'walk_pattern_map_dbscan_savgol_resampled_30s_live_sol.html'
m.save(map_filepath_dbscan_savgol_30s_live_sol)

map_filepath_dbscan_savgol_30s_live_sol


'walk_pattern_map_dbscan_savgol_resampled_30s_live_sol.html'