In [6]:
# Re-import necessary libraries
import pandas as pd

# Re-load datasets
known_route_path = r"C:\Users\Sandesh\Desktop\DecodeX\Known route.csv"
ongoing_data_path = r"C:\Users\Sandesh\Desktop\DecodeX\On going data.csv"

known_route_df = pd.read_csv(known_route_path)
ongoing_data_df = pd.read_csv(ongoing_data_path)

# Convert 'Date' and 'Time' into proper datetime format
known_route_df["datetime"] = pd.to_datetime(known_route_df["Date"] + " " + known_route_df["Time"], errors='coerce')
ongoing_data_df["datetime"] = pd.to_datetime(ongoing_data_df["Date"] + " " + ongoing_data_df["Time"], errors='coerce')

# Drop original Date and Time columns as we now have a proper datetime column
known_route_df.drop(columns=["Date", "Time"], inplace=True)
ongoing_data_df.drop(columns=["Date", "Time"], inplace=True)

# Check for missing values
missing_values_known = known_route_df.isnull().sum()
missing_values_ongoing = ongoing_data_df.isnull().sum()

# Display processed data and missing values
print("Known Route DataFrame:\n", known_route_df.head(), "\n")
print("Ongoing Data DataFrame:\n", ongoing_data_df.head(), "\n")
print("Missing Values in Known Route DataFrame:\n", missing_values_known, "\n")
print("Missing Values in Ongoing Data DataFrame:\n", missing_values_ongoing, "\n")

Known Route DataFrame:
                Route_ID   latitude  longitude timestamp            datetime
0  0_Austin_Los Angeles  30.267115 -97.743072   27:31.3 2024-09-07 04:27:31
1  0_Austin_Los Angeles  30.274387 -97.761541   37:35.8 2024-09-07 04:37:36
2  0_Austin_Los Angeles  30.264452 -97.786738   47:40.4 2024-09-07 04:47:40
3  0_Austin_Los Angeles  30.236759 -97.840425   57:45.0 2024-09-07 04:57:45
4  0_Austin_Los Angeles  30.232651 -97.876297   07:49.5 2024-09-07 05:07:50 

Ongoing Data DataFrame:
   Pallet_ID   latitude  longitude timestamp            datetime
0     Y0623  41.795567 -87.575332   00:00.0 2024-05-09 08:00:00
1    S21000  40.584618 -73.845570   10:54.1 2024-05-09 08:10:54
2     A8624  41.996865 -87.829199   21:07.6 2024-05-09 08:21:08
3    I21001  40.576574 -74.307722   32:42.3 2024-05-09 08:32:42
4    Q11002  40.551580 -74.206959   43:36.4 2024-05-09 08:43:36 

Missing Values in Known Route DataFrame:
 Route_ID     0
latitude     0
longitude    0
timestamp    0
datet

In [8]:
pip install geopy

Collecting geopy
  Obtaining dependency information for geopy from https://files.pythonhosted.org/packages/e5/15/cf2a69ade4b194aa524ac75112d5caac37414b20a3a03e6865dfe0bd1539/geopy-2.4.1-py3-none-any.whl.metadata
  Downloading geopy-2.4.1-py3-none-any.whl.metadata (6.8 kB)
Collecting geographiclib<3,>=1.52 (from geopy)
  Obtaining dependency information for geographiclib<3,>=1.52 from https://files.pythonhosted.org/packages/9f/5a/a26132406f1f40cf51ea349a5f11b0a46cec02a2031ff82e391c2537247a/geographiclib-2.0-py3-none-any.whl.metadata
  Downloading geographiclib-2.0-py3-none-any.whl.metadata (1.4 kB)
Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
   ---------------------------------------- 0.0/125.4 kB ? eta -:--:--
   ---------------------------------------- 0.0/125.4 kB ? eta -:--:--
   ---------------------------------------- 0.0/125.4 kB ? eta -:--:--
   ---------------------------------------- 0.0/125.4 kB ? eta -:--:--
   ---------------------------------------- 0.0/125.4 kB ? et

In [10]:
from geopy.distance import geodesic

# Function to check if a pallet's current position is within the allowed deviation from the known route
def check_route_adherence(pallet_lat, pallet_lon, known_routes, threshold_km=10):
    for _, row in known_routes.iterrows():
        route_point = (row["latitude"], row["longitude"])
        pallet_point = (pallet_lat, pallet_lon)
        
        # Calculate distance
        distance = geodesic(route_point, pallet_point).km
        if distance <= threshold_km:
            return "On-Route"  # Within the allowed deviation

    return "Off-Route"  # If no matching route points are within the threshold

# Apply the function to ongoing data
ongoing_data_df["Route_Status"] = ongoing_data_df.apply(
    lambda row: check_route_adherence(row["latitude"], row["longitude"], known_route_df), axis=1
)

# Display results
ongoing_data_df

Unnamed: 0,Pallet_ID,latitude,longitude,timestamp,datetime,Route_Status
0,Y0623,41.795567,-87.575332,00:00.0,2024-05-09 08:00:00,On-Route
1,S21000,40.584618,-73.845570,10:54.1,2024-05-09 08:10:54,Off-Route
2,A8624,41.996865,-87.829199,21:07.6,2024-05-09 08:21:08,On-Route
3,I21001,40.576574,-74.307722,32:42.3,2024-05-09 08:32:42,Off-Route
4,Q11002,40.551580,-74.206959,43:36.4,2024-05-09 08:43:36,Off-Route
...,...,...,...,...,...,...
846,Y51051,47.452115,-121.644416,47:06.2,2024-07-09 09:47:06,On-Route
847,F31510,45.513573,-121.813578,04:01.6,2024-07-09 10:04:02,Off-Route
848,K71511,45.616479,-122.019548,14:12.9,2024-07-09 10:14:13,On-Route
849,A91512,45.464929,-121.884768,24:24.3,2024-07-09 10:24:24,Off-Route
