In [1]:
import pandas as pd
import requests
import numpy as np
import time
import folium
import matplotlib.pyplot as plt
import json

In [2]:
# Load existing data if available
try:
    distance_df = pd.read_csv("data/osrm_distance_matrix.csv", index_col=0)
    distance_matrix = distance_df.values
    print(distance_df.shape)
    duration_df = pd.read_csv("data/osrm_duration_matrix.csv", index_col=0)
    duration_matrix = duration_df.values
    existing_codes = duration_df.index.tolist()  # Get existing codes from the index
except FileNotFoundError:
    print("No previous data found. Starting fresh.")
    distance_matrix = np.zeros((0, 0))  # Initialize empty matrices
    duration_matrix = np.zeros((0, 0))
    existing_codes = []

(651, 651)


In [3]:
# Load new data from CSV
data = pd.read_csv('data/07-03-2025-PO.csv')
data = data.dropna(subset=['CODE'])


if data['CODE'].dtype in ['float', 'int']:
    data['CODE'] = data['CODE'].astype(int)
    data['CODE'] = data['CODE'].astype(str)
    print('Converting to Object')

In [4]:
# #Load new data from CSV
data = pd.read_csv('data/master_gps.csv')
data = data.dropna(subset=['CODE'])

In [5]:
SMAK_KADAWATHA = (7.0038321,79.9394804)

smak_data = {
    "CODE":'0',
    "LOCATION":"SMAK",
    "ADDRESS":"Smak, Kadawatha, Western Province, Sri Lanka",
    "LATITUDE":SMAK_KADAWATHA[0],
    "LONGITUDE":SMAK_KADAWATHA[1],
    "BRAND":"SMAK"
}


data = pd.concat(
    [
        pd.DataFrame(smak_data, index=[0]),
        data
    ],
    ignore_index=True
)

In [6]:
# Extract locations and codes from new data
new_data = data[['CODE', 'LATITUDE', 'LONGITUDE']].drop_duplicates(subset=['CODE'])
new_codes = new_data['CODE'].tolist()
new_locations = [(row['LATITUDE'], row['LONGITUDE']) for _, row in new_data.iterrows()]

In [7]:
new_data.shape, data.shape

((651, 3), (651, 6))

In [8]:
data[data['CODE']=='0']

Unnamed: 0,CODE,LOCATION,ADDRESS,LATITUDE,LONGITUDE,BRAND
0,0,SMAK,"Smak, Kadawatha, Western Province, Sri Lanka",7.003832,79.93948,SMAK


In [9]:
# Identify codes that are not in the existing matrix
codes_to_add = [code for code in new_codes if code not in existing_codes]
locations_to_add = [loc for i, loc in enumerate(new_locations) if new_codes[i] in codes_to_add]

In [10]:
len(codes_to_add), len(locations_to_add)

(0, 0)

In [11]:
master_gps = pd.read_csv('data/master_gps.csv')

# Update master_gps with new entries
new_gps_data = data[data['CODE'].isin(codes_to_add)][['CODE', 'LOCATION' ,'ADDRESS', 'LATITUDE', 'LONGITUDE', 'BRAND']]
master_gps = pd.concat([master_gps, new_gps_data], ignore_index=True)
master_gps = master_gps.drop_duplicates(subset=['CODE'], keep='first')
print(master_gps.shape)
# master_gps.to_csv('data/master_gps.csv', index=False)

(650, 6)


In [12]:
master_data = pd.concat(
    [
        pd.DataFrame(smak_data, index=[0]),
        master_gps
    ],
    ignore_index=True
)

In [13]:
existing_locations = list(zip(master_data['LATITUDE'], master_data['LONGITUDE']))

In [14]:
len(existing_locations), len(codes_to_add)

(651, 0)

In [15]:
def get_osrm_data(origin, destination):
    """
    Get the distance between two coordinates using OSRM API.
    :param origin: (latitude, longitude)
    :param destination: (latitude, longitude)
    :return: Distance in meters
    """
    osrm_base_url = "http://router.project-osrm.org/route/v1/car"
    url = f"{osrm_base_url}/{origin[1]},{origin[0]};{destination[1]},{destination[0]}?overview=full&geometries=geojson"
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        if "routes" in data and len(data["routes"]) > 0:
            path_cords = data["routes"][0]["geometry"]["coordinates"]
            distance = data["routes"][0]["distance"] / 1000
            duration = data["routes"][0]["duration"]/ 60
            return path_cords, distance, duration
        
    return None, np.inf, np.inf

In [16]:
len(existing_locations), len(locations_to_add)

(651, 0)

In [17]:
len(existing_codes), len(codes_to_add)

(651, 0)

In [18]:
len(codes_to_add), len(locations_to_add)

(0, 0)

In [19]:
if locations_to_add is not None:
    locations = existing_locations
    # locations.extend(locations_to_add)
    codes = existing_codes + codes_to_add
    num_codes = len(codes)
    print(len(locations))
    print(len(codes))
    # Resize matrices if new locations are added
    if distance_matrix.shape[0] < num_codes:
        old_size = distance_matrix.shape[0]
        new_distance_matrix = np.zeros((num_codes, num_codes))
        new_duration_matrix = np.zeros((num_codes, num_codes))
        
        if old_size > 0:
            new_distance_matrix[:old_size, :old_size] = distance_matrix
            new_duration_matrix[:old_size, :old_size] = duration_matrix
        
        distance_matrix = new_distance_matrix
        duration_matrix = new_duration_matrix
        

    # route_data = []
    for i in range(num_codes):
        for j in range(i + 1, num_codes):
            if distance_matrix[i][j] == 0 or np.isinf(distance_matrix[i][j]):
                origin = locations[i]
                destination = locations[j]

                path_cords, distance, duration = get_osrm_data(origin, destination)

                distance_matrix[i][j] = round(distance, 2)
                distance_matrix[j][i] = round(distance, 2)
                
                duration_matrix[i][j] = round(duration, 2)
                duration_matrix[j][i] = round(duration, 2)
                time.sleep(0.00001)
        print(f'{i} th row proceeded')

        if i % 5 == 0:
            distance_df = pd.DataFrame(distance_matrix, index=codes, columns=codes)
            distance_df.to_csv("data/osrm_distance_matrix.csv")
            duration_df = pd.DataFrame(duration_matrix, index=codes, columns=codes)
            duration_df.to_csv("data/osrm_duration_matrix.csv")
            
            # with open("data/osrm_route_data.json", "w") as json_file:
            #     json.dump(route_data, json_file, indent=4)

    # Final save for matrices and route data
    distance_df = pd.DataFrame(distance_matrix, index=codes, columns=codes)
    distance_df.to_csv("data/osrm_distance_matrix.csv")
    duration_df = pd.DataFrame(duration_matrix, index=codes, columns=codes)
    duration_df.to_csv("data/osrm_duration_matrix.csv")
    # with open("data/osrm_route_data.json", "w") as json_file:
    #     json.dump(route_data, json_file, indent=4)
    print(f"Added {len(locations_to_add)} new unique locations with codes: {codes_to_add}")
    print("Master GPS file updated.")       
else:
    print("No location to add")

651
651
0 th row proceeded
1 th row proceeded
2 th row proceeded
3 th row proceeded
4 th row proceeded
5 th row proceeded
6 th row proceeded
7 th row proceeded
8 th row proceeded
9 th row proceeded
10 th row proceeded
11 th row proceeded
12 th row proceeded
13 th row proceeded
14 th row proceeded
15 th row proceeded
16 th row proceeded
17 th row proceeded
18 th row proceeded
19 th row proceeded
20 th row proceeded
21 th row proceeded
22 th row proceeded
23 th row proceeded
24 th row proceeded
25 th row proceeded
26 th row proceeded
27 th row proceeded
28 th row proceeded
29 th row proceeded
30 th row proceeded
31 th row proceeded
32 th row proceeded
33 th row proceeded
34 th row proceeded
35 th row proceeded
36 th row proceeded
37 th row proceeded
38 th row proceeded
39 th row proceeded
40 th row proceeded
41 th row proceeded
42 th row proceeded
43 th row proceeded
44 th row proceeded
45 th row proceeded
46 th row proceeded
47 th row proceeded
48 th row proceeded
49 th row proceeded
50

In [2]:
lis = [1,2,3]
set(lis)

{1, 2, 3}

In [3]:
{1,2,3,4,5} - {1,2,3}

{4, 5}