In [22]:

from concurrent.futures import ThreadPoolExecutor

In [100]:
import numpy as np 
import pandas as pd
import random
from datetime import datetime, timedelta

# Number of customers
num_customers = 100

# Define the start date of the simulation (3 months from today)
start_date = datetime.today()

# Function to generate a random departure time within a given time range (start_hour, start_minute) to (end_hour, end_minute)
def generate_random_departure_time(start_hour, start_minute, end_hour, end_minute):
    random_hour = np.random.randint(start_hour, end_hour + 1)
    if random_hour == end_hour:
        random_minute = np.random.randint(start_minute, 60)
    else:
        random_minute = np.random.randint(0, 60)
    return random_hour, random_minute

# Function to generate random latitudes and longitudes (for home and office)
def gen_rand_lat_long():
    lat = np.random.uniform(12.79, 13.02)  # Bengaluru coordinates range (example)
    lon = np.random.uniform(77.54, 77.75)  # Bengaluru coordinates range (example)
    return lat, lon

# Function to generate the trip records
def trip_rec():
    trip_data = []

    for customer_id in range(1, num_customers + 1):
        # Generate home and office locations only once per customer
        home_lat, home_lon = gen_rand_lat_long()
        office_lat, office_lon = gen_rand_lat_long()

        for day in range(0, 90):  # Simulate 90 days (3 months approx, including weekends)
            date = start_date + timedelta(days=day)

            # Simulate home-to-office trip (morning commute)
            departure_hour, departure_minute = generate_random_departure_time(6, 0, 9, 0)  # Random time between 6:00 AM to 9:00 AM
            trip_data.append({
                'customer_id': customer_id,
                'date': date.strftime('%Y-%m-%d'),
                'departure_time': f'{departure_hour:02}:{departure_minute:02}',
                'trip_type': 'home_to_office',
                'home_lat': home_lat,
                'home_lon': home_lon,
                'office_lat': office_lat,
                'office_lon': office_lon,
            })

            # Simulate office-to-home trip (evening commute)
            departure_hour, departure_minute = generate_random_departure_time(16, 0, 21, 0)  # Random time between 4:00 PM to 9:00 PM
            trip_data.append({
                'customer_id': customer_id,
                'date': date.strftime('%Y-%m-%d'),
                'departure_time': f'{departure_hour:02}:{departure_minute:02}',
                'trip_type': 'office_to_home',
                'home_lat': office_lat,  # Make sure it's the same for both trips
                'home_lon': office_lon,  # Same here as well
                'office_lat': home_lat,  # Make sure it's the same for both trips
                'office_lon': home_lon,  # Same here as well
            })
    
    return trip_data

# Generate the trips
trip_records = trip_rec()

# Convert to a DataFrame for easier analysis
df_trips = pd.DataFrame(trip_records)
df_trips['day_of_week'] = pd.to_datetime(df_trips['date']).dt.weekday

# Display the first few rows of the dataset
display(df_trips)



Unnamed: 0,customer_id,date,departure_time,trip_type,home_lat,home_lon,office_lat,office_lon,day_of_week
0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4
1,1,2025-04-11,17:03,office_to_home,12.947753,77.648378,12.891368,77.705299,4
2,1,2025-04-12,09:12,home_to_office,12.891368,77.705299,12.947753,77.648378,5
3,1,2025-04-12,17:31,office_to_home,12.947753,77.648378,12.891368,77.705299,5
4,1,2025-04-13,09:32,home_to_office,12.891368,77.705299,12.947753,77.648378,6
...,...,...,...,...,...,...,...,...,...
17995,100,2025-07-07,19:31,office_to_home,12.950105,77.696155,12.814787,77.707082,0
17996,100,2025-07-08,06:44,home_to_office,12.814787,77.707082,12.950105,77.696155,1
17997,100,2025-07-08,20:10,office_to_home,12.950105,77.696155,12.814787,77.707082,1
17998,100,2025-07-09,09:10,home_to_office,12.814787,77.707082,12.950105,77.696155,2


In [101]:
df_trips.to_csv("../JJ_ETA/Data/Final_1.csv")

In [104]:
customers = df_trips['customer_id'].unique()[:100]  # First 100 customers
days_of_week = range(7)  # Days of the week (0 to 6)

# Prepare an empty list to store the new data
new_data = []

# Fill the new data with customer info for all 7 days of the week
for customer in customers:
    # Get the first occurrence of the customer for their home and office locations
    customer_data = df_trips[df_trips['customer_id'] == customer].iloc[0]
    
    # Add a row for each day of the week
    for day in days_of_week:
        new_data.append({
            'customer_id': customer,
            'day_of_week': day,
            'home_lat': customer_data['home_lat'],
            'home_lon': customer_data['home_lon'],
            'office_lat': customer_data['office_lat'],
            'office_lon': customer_data['office_lon']
        })

# Convert the list to a DataFrame
df_extracted_700 = pd.DataFrame(new_data)

# Show the number of rows to verify
df_extracted_700.shape[0]
display(df_extracted_700)

Unnamed: 0,customer_id,day_of_week,home_lat,home_lon,office_lat,office_lon
0,1,0,12.891368,77.705299,12.947753,77.648378
1,1,1,12.891368,77.705299,12.947753,77.648378
2,1,2,12.891368,77.705299,12.947753,77.648378
3,1,3,12.891368,77.705299,12.947753,77.648378
4,1,4,12.891368,77.705299,12.947753,77.648378
...,...,...,...,...,...,...
695,100,2,12.814787,77.707082,12.950105,77.696155
696,100,3,12.814787,77.707082,12.950105,77.696155
697,100,4,12.814787,77.707082,12.950105,77.696155
698,100,5,12.814787,77.707082,12.950105,77.696155


In [105]:
df_extracted_700.to_csv("../JJ_ETA/Data/Final_1_unique.csv")

Failed to retrieve data. Status Code: 401
Could not retrieve the distance and duration.


In [109]:
df3=pd.DataFrame()
df3=df_trips[['customer_id','trip_type','home_lat','home_lon','office_lat','office_lon']].drop_duplicates(subset=['customer_id'])
display(df3)

Unnamed: 0,customer_id,trip_type,home_lat,home_lon,office_lat,office_lon
0,1,home_to_office,12.891368,77.705299,12.947753,77.648378
180,2,home_to_office,12.831786,77.548423,13.014858,77.592067
360,3,home_to_office,12.983131,77.701488,12.840102,77.611706
540,4,home_to_office,12.937659,77.670132,12.935367,77.633624
720,5,home_to_office,12.856614,77.689541,12.969395,77.624232
...,...,...,...,...,...,...
17100,96,home_to_office,12.930530,77.738733,12.904230,77.696180
17280,97,home_to_office,12.932761,77.736947,12.981803,77.727585
17460,98,home_to_office,12.811784,77.615661,12.884605,77.720786
17640,99,home_to_office,13.004288,77.677651,12.843650,77.573015


In [110]:
df3.to_csv("../JJ_ETA/Data/Final_simp_unique.csv")

In [111]:
import requests
# Load the dataset
df4 = pd.read_csv('../JJ_ETA/Data/Final_simp_unique.csv')

# Function to calculate distance using OSRM API
def get_distance(home_lat, home_lon, office_lat, office_lon):
    url = f"http://router.project-osrm.org/route/v1/driving/{home_lon},{home_lat};{office_lon},{office_lat}?overview=false&steps=false"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if data['routes']:
            return data['routes'][0]['distance'] / 1000  # Convert meters to kilometers
    return None  # In case of failure

# Calculate the distance for each row and add it to a new column 'distance_km'
df4['distance_km'] = df4.apply(
    lambda row: get_distance(row['home_lat'], row['home_lon'], row['office_lat'], row['office_lon']),
    axis=1
)
df4.to_csv("../JJ_ETA/Data/Final_simp_unique_with_distance.csv")


In [112]:
display(df4)

Unnamed: 0.1,Unnamed: 0,customer_id,trip_type,home_lat,home_lon,office_lat,office_lon,distance_km
0,0,1,home_to_office,12.891368,77.705299,12.947753,77.648378,12.9867
1,180,2,home_to_office,12.831786,77.548423,13.014858,77.592067,25.2954
2,360,3,home_to_office,12.983131,77.701488,12.840102,77.611706,27.1161
3,540,4,home_to_office,12.937659,77.670132,12.935367,77.633624,7.9356
4,720,5,home_to_office,12.856614,77.689541,12.969395,77.624232,19.7501
...,...,...,...,...,...,...,...,...
95,17100,96,home_to_office,12.930530,77.738733,12.904230,77.696180,8.3662
96,17280,97,home_to_office,12.932761,77.736947,12.981803,77.727585,9.3367
97,17460,98,home_to_office,12.811784,77.615661,12.884605,77.720786,20.9783
98,17640,99,home_to_office,13.004288,77.677651,12.843650,77.573015,29.8425


In [114]:
df_distance = pd.read_csv('../JJ_ETA/Data/Final_simp_unique_with_distance.csv')
df_final = pd.read_csv('../JJ_ETA/Data/Final_1.csv')
display(df_final.head())
display(df_distance.head())
# Merge the two datasets on 'customer_id' column to add the 'distance_km' column

Unnamed: 0.1,Unnamed: 0,customer_id,date,departure_time,trip_type,home_lat,home_lon,office_lat,office_lon,day_of_week
0,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4
1,1,1,2025-04-11,17:03,office_to_home,12.947753,77.648378,12.891368,77.705299,4
2,2,1,2025-04-12,09:12,home_to_office,12.891368,77.705299,12.947753,77.648378,5
3,3,1,2025-04-12,17:31,office_to_home,12.947753,77.648378,12.891368,77.705299,5
4,4,1,2025-04-13,09:32,home_to_office,12.891368,77.705299,12.947753,77.648378,6


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,customer_id,trip_type,home_lat,home_lon,office_lat,office_lon,distance_km
0,0,0,1,home_to_office,12.891368,77.705299,12.947753,77.648378,12.9867
1,1,180,2,home_to_office,12.831786,77.548423,13.014858,77.592067,25.2954
2,2,360,3,home_to_office,12.983131,77.701488,12.840102,77.611706,27.1161
3,3,540,4,home_to_office,12.937659,77.670132,12.935367,77.633624,7.9356
4,4,720,5,home_to_office,12.856614,77.689541,12.969395,77.624232,19.7501


In [116]:
df_merged = pd.merge(df_final, df_distance[['customer_id', 'distance_km']], on='customer_id', how='left')
display(df_merged.head())

Unnamed: 0.1,Unnamed: 0,customer_id,date,departure_time,trip_type,home_lat,home_lon,office_lat,office_lon,day_of_week,distance_km
0,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867
1,1,1,2025-04-11,17:03,office_to_home,12.947753,77.648378,12.891368,77.705299,4,12.9867
2,2,1,2025-04-12,09:12,home_to_office,12.891368,77.705299,12.947753,77.648378,5,12.9867
3,3,1,2025-04-12,17:31,office_to_home,12.947753,77.648378,12.891368,77.705299,5,12.9867
4,4,1,2025-04-13,09:32,home_to_office,12.891368,77.705299,12.947753,77.648378,6,12.9867


In [117]:
df_merged.to_csv("../JJ_ETA/Data/Final_with_distance_verified.csv")

In [128]:
df_700=pd.read_csv('../JJ_ETA/Data/Final_1_unique.csv')
print(df_700.shape[0])
display(df_700.head())

700


Unnamed: 0.1,Unnamed: 0,customer_id,day_of_week,home_lat,home_lon,office_lat,office_lon
0,0,1,0,12.891368,77.705299,12.947753,77.648378
1,1,1,1,12.891368,77.705299,12.947753,77.648378
2,2,1,2,12.891368,77.705299,12.947753,77.648378
3,3,1,3,12.891368,77.705299,12.947753,77.648378
4,4,1,4,12.891368,77.705299,12.947753,77.648378


In [129]:

# Load the dataset


# Function to calculate baseline duration using OSRM API
def get_baseline_duration(home_lat, home_lon, office_lat, office_lon):
    url = f"http://router.project-osrm.org/route/v1/driving/{home_lon},{home_lat};{office_lon},{office_lat}?overview=false&steps=false"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        if data['routes']:
            duration_seconds = data['routes'][0]['duration']  # Duration in seconds
            return duration_seconds / 60  # Convert to minutes
    return None  # In case of failure

# Apply the function to get baseline duration for each trip
df_700['baseline_duration_min'] = df_700.apply(
    lambda row: get_baseline_duration(row['home_lat'], row['home_lon'], row['office_lat'], row['office_lon']),
    axis=1
)

# Display the updated dataframe
print(df_700.head())

# Save the updated dataframe with baseline duration to a new CSV file
df_700.to_csv('../JJ_ETA/Data/Final_1_unique_with_time.csv', index=False)


   Unnamed: 0  customer_id  day_of_week   home_lat   home_lon  office_lat  \
0           0            1            0  12.891368  77.705299   12.947753   
1           1            1            1  12.891368  77.705299   12.947753   
2           2            1            2  12.891368  77.705299   12.947753   
3           3            1            3  12.891368  77.705299   12.947753   
4           4            1            4  12.891368  77.705299   12.947753   

   office_lon  baseline_duration_min  
0   77.648378                  20.33  
1   77.648378                  20.33  
2   77.648378                  20.33  
3   77.648378                  20.33  
4   77.648378                  20.33  


In [132]:
display(df_merged)

Unnamed: 0.1,Unnamed: 0,customer_id,date,departure_time,trip_type,home_lat,home_lon,office_lat,office_lon,day_of_week,distance_km
0,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867
1,1,1,2025-04-11,17:03,office_to_home,12.947753,77.648378,12.891368,77.705299,4,12.9867
2,2,1,2025-04-12,09:12,home_to_office,12.891368,77.705299,12.947753,77.648378,5,12.9867
3,3,1,2025-04-12,17:31,office_to_home,12.947753,77.648378,12.891368,77.705299,5,12.9867
4,4,1,2025-04-13,09:32,home_to_office,12.891368,77.705299,12.947753,77.648378,6,12.9867
...,...,...,...,...,...,...,...,...,...,...,...
17995,17995,100,2025-07-07,19:31,office_to_home,12.950105,77.696155,12.814787,77.707082,0,18.3599
17996,17996,100,2025-07-08,06:44,home_to_office,12.814787,77.707082,12.950105,77.696155,1,18.3599
17997,17997,100,2025-07-08,20:10,office_to_home,12.950105,77.696155,12.814787,77.707082,1,18.3599
17998,17998,100,2025-07-09,09:10,home_to_office,12.814787,77.707082,12.950105,77.696155,2,18.3599


In [133]:
df_merged2 = pd.merge(df_merged, df_700[['customer_id', 'baseline_duration_min']], on='customer_id', how='left')
display(df_merged2)

Unnamed: 0.1,Unnamed: 0,customer_id,date,departure_time,trip_type,home_lat,home_lon,office_lat,office_lon,day_of_week,distance_km,baseline_duration_min
0,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000
1,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000
2,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000
3,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000
4,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000
...,...,...,...,...,...,...,...,...,...,...,...,...
125995,17999,100,2025-07-09,16:52,office_to_home,12.950105,77.696155,12.814787,77.707082,2,18.3599,25.733333
125996,17999,100,2025-07-09,16:52,office_to_home,12.950105,77.696155,12.814787,77.707082,2,18.3599,25.733333
125997,17999,100,2025-07-09,16:52,office_to_home,12.950105,77.696155,12.814787,77.707082,2,18.3599,25.733333
125998,17999,100,2025-07-09,16:52,office_to_home,12.950105,77.696155,12.814787,77.707082,2,18.3599,25.733333


In [134]:
df_merged2.to_csv("../JJ_ETA/Data/distance_and_time_verified.csv") 

In [139]:
import numpy as np
from datetime import datetime, timedelta

df_final = pd.read_csv('../JJ_ETA/Data/distance_and_time_verified.csv')
print(df_final.shape[0])

# Display the first few rows of the dataset to understand its structure
df_final.head()
# Function to calculate traffic factor based on day of the week and departure time
# Function to calculate traffic factor based on Bangalore's traffic patterns
def calculate_bangalore_traffic_factor(day_of_week, departure_time):
    # Define peak and off-peak hours for Bangalore's traffic
    morning_peak = range(7, 10)  # 7 AM to 10 AM (Morning rush hour)
    evening_peak = range(17, 21)  # 5 PM to 8 PM (Evening rush hour)
    off_peak = range(12, 16)  # 12 PM to 4 PM (Afternoon, lighter traffic)
    
    # Base traffic factor
    traffic_factor = 1.0
    
    # Weekdays (Monday to Friday)
    if day_of_week < 5:  
        # Higher traffic during peak hours
        if int(departure_time.split(':')[0]) in morning_peak or int(departure_time.split(':')[0]) in evening_peak:
            traffic_factor = 1.8  # Increase traffic during rush hours
        elif int(departure_time.split(':')[0]) in off_peak:
            traffic_factor = 1.2  # Moderate traffic during off-peak hours
    # Weekends (Saturday and Sunday)
    else:  
        # Moderate traffic during weekends, higher in the evening
        if int(departure_time.split(':')[0]) in morning_peak or int(departure_time.split(':')[0]) in evening_peak:
            traffic_factor = 1.5  # Moderate traffic during peak hours
        elif int(departure_time.split(':')[0]) in off_peak:
            traffic_factor = 1.1  # Lighter traffic during afternoon

    # Add noise to the traffic factor to account for unpredictability
    noise = np.random.uniform(0.95, 1.05)  # Noise between 0.95 and 1.05
    return traffic_factor * noise

# Apply the new traffic factor based on Bangalore traffic patterns and calculate arrival time
def calculate_bangalore_arrival_time(row):
    baseline_duration = row['distance_km'] * 5  # Example: 5 minutes per km (baseline)
    traffic_factor = calculate_bangalore_traffic_factor(row['day_of_week'], row['departure_time'])
    
    # Adjust the baseline duration with the traffic factor
    adjusted_duration = baseline_duration * traffic_factor
    
    # Add the duration to the departure time
    departure_time_obj = datetime.strptime(row['departure_time'], "%H:%M")
    arrival_time_obj = departure_time_obj + timedelta(minutes=adjusted_duration)
    
    # Return the new arrival time in HH:MM format
    return arrival_time_obj.strftime("%H:%M")

# Apply the function to create the new column 'arrival_time' based on Bangalore's traffic
df_final['arrival_time'] = df_final.apply(calculate_bangalore_arrival_time, axis=1)

# Display the updated dataframe with the new 'arrival_time' column
print(df_final.shape[0])
display(df_final)
df_final.to_csv("../JJ_ETA/Data/Final_distance_and_time_verified.csv") 


126000
126000


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,customer_id,date,departure_time,trip_type,home_lat,home_lon,office_lat,office_lon,day_of_week,distance_km,baseline_duration_min,arrival_time
0,0,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000,09:02
1,1,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000,09:05
2,2,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000,09:04
3,3,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000,09:07
4,4,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,20.330000,08:59
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125995,125995,17999,100,2025-07-09,16:52,office_to_home,12.950105,77.696155,12.814787,77.707082,2,18.3599,25.733333,18:28
125996,125996,17999,100,2025-07-09,16:52,office_to_home,12.950105,77.696155,12.814787,77.707082,2,18.3599,25.733333,18:20
125997,125997,17999,100,2025-07-09,16:52,office_to_home,12.950105,77.696155,12.814787,77.707082,2,18.3599,25.733333,18:19
125998,125998,17999,100,2025-07-09,16:52,office_to_home,12.950105,77.696155,12.814787,77.707082,2,18.3599,25.733333,18:22


In [141]:
df_final.to_csv("../JJ_ETA/Data/Final_distance_and_time_verified.csv")
df_ff = pd.read_csv('../JJ_ETA/Data/Final_Dataset_with_Bangalore_Arrival_Time.csv')
display(df_ff)
df_ff['departure_time'] = pd.to_datetime(df_ff['departure_time'], format='%H:%M')
df_ff['arrival_time'] = pd.to_datetime(df_ff['arrival_time'], format='%H:%M')

# Calculate the time difference (time taken) between departure_time and arrival_time
df_ff['time_taken'] = (df_ff['arrival_time'] - df_ff['departure_time']).dt.total_seconds() / 60  # Convert to minutes
display(df_ff)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,customer_id,date,departure_time,trip_type,home_lat,home_lon,office_lat,office_lon,day_of_week,distance_km,arrival_time
0,0,0,1,2025-04-11,07:06,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,09:01
1,1,1,1,2025-04-11,17:03,office_to_home,12.947753,77.648378,12.891368,77.705299,4,12.9867,18:54
2,2,2,1,2025-04-12,09:12,home_to_office,12.891368,77.705299,12.947753,77.648378,5,12.9867,10:54
3,3,3,1,2025-04-12,17:31,office_to_home,12.947753,77.648378,12.891368,77.705299,5,12.9867,19:04
4,4,4,1,2025-04-13,09:32,home_to_office,12.891368,77.705299,12.947753,77.648378,6,12.9867,11:04
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17995,17995,17995,100,2025-07-07,19:31,office_to_home,12.950105,77.696155,12.814787,77.707082,0,18.3599,22:20
17996,17996,17996,100,2025-07-08,06:44,home_to_office,12.814787,77.707082,12.950105,77.696155,1,18.3599,08:11
17997,17997,17997,100,2025-07-08,20:10,office_to_home,12.950105,77.696155,12.814787,77.707082,1,18.3599,23:03
17998,17998,17998,100,2025-07-09,09:10,home_to_office,12.814787,77.707082,12.950105,77.696155,2,18.3599,11:59


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,customer_id,date,departure_time,trip_type,home_lat,home_lon,office_lat,office_lon,day_of_week,distance_km,arrival_time,time_taken
0,0,0,1,2025-04-11,1900-01-01 07:06:00,home_to_office,12.891368,77.705299,12.947753,77.648378,4,12.9867,1900-01-01 09:01:00,115.0
1,1,1,1,2025-04-11,1900-01-01 17:03:00,office_to_home,12.947753,77.648378,12.891368,77.705299,4,12.9867,1900-01-01 18:54:00,111.0
2,2,2,1,2025-04-12,1900-01-01 09:12:00,home_to_office,12.891368,77.705299,12.947753,77.648378,5,12.9867,1900-01-01 10:54:00,102.0
3,3,3,1,2025-04-12,1900-01-01 17:31:00,office_to_home,12.947753,77.648378,12.891368,77.705299,5,12.9867,1900-01-01 19:04:00,93.0
4,4,4,1,2025-04-13,1900-01-01 09:32:00,home_to_office,12.891368,77.705299,12.947753,77.648378,6,12.9867,1900-01-01 11:04:00,92.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17995,17995,17995,100,2025-07-07,1900-01-01 19:31:00,office_to_home,12.950105,77.696155,12.814787,77.707082,0,18.3599,1900-01-01 22:20:00,169.0
17996,17996,17996,100,2025-07-08,1900-01-01 06:44:00,home_to_office,12.814787,77.707082,12.950105,77.696155,1,18.3599,1900-01-01 08:11:00,87.0
17997,17997,17997,100,2025-07-08,1900-01-01 20:10:00,office_to_home,12.950105,77.696155,12.814787,77.707082,1,18.3599,1900-01-01 23:03:00,173.0
17998,17998,17998,100,2025-07-09,1900-01-01 09:10:00,home_to_office,12.814787,77.707082,12.950105,77.696155,2,18.3599,1900-01-01 11:59:00,169.0


In [142]:
df_final.to_csv("../JJ_ETA/Data/Final_Dataset_with_Bangalore_Arrival_Time.csv") 