## The Below Code is used to convert the Glider data from KMZ to CSV in a Lat, Lon, Date, Time format which was extracted from the SFMC software. Below are the steps that code is being used and the source file. 

1. Extract the data from SFMC server > Options > "Extract Map Events to KMZ" All the Glider Data and various events in KMZ format.
2. After downloading the data use any internet service providers like mygeodata.cloud to convert from KMZ to CSV.  The output will give all map events in separate csv files or a merged csv file with different map events. 
3. The File is in csv with 3rd column as a hmtl blob, with GPS date and time in it in embeded format. 
4. Then use the below script to extract the data from raw csv file to [Lat/Lon/Name/Description]
   

In [3]:
import pandas as pd
import re

# Step 1: Read the CSV file
file_path = 'Surfacings.csv'  # <-- make sure your file is in the same folder or update path
df = pd.read_csv(file_path, header=None)

# Step 2: Rename columns for easier reference
df.columns = ['Longitude', 'Latitude', 'Name', 'Description']

# Step 3: Drop the first row if it repeats headers inside (optional based on your file)
df = df.drop(index=0).reset_index(drop=True)

# Step 4: Define regex to extract Date and Time from Description
date_time_pattern = re.compile(r"Time of GPS Position:\s*([\d-]+)\s*([\d:]+)")

# Step 5: Prepare list to collect extracted information
extracted_data = []

for idx, row in df.iterrows():
    description = row['Description']
    match = date_time_pattern.search(description)
    if match:
        date, time = match.groups()
        extracted_data.append({
            'Latitude': row['Latitude'],
            'Longitude': row['Longitude'],
            'Date': date,
            'Time': time
        })

# Step 6: Convert to DataFrame
final_df = pd.DataFrame(extracted_data)

# Step 7: Save to a new CSV
final_df.to_csv('Extracted_GPS_Data.csv', index=False)

print("✅ Extraction complete! Saved as 'Extracted_GPS_Data.csv'")
print(final_df.head())


✅ Extraction complete! Saved as 'Extracted_GPS_Data.csv'
            Latitude         Longitude        Date      Time
0  -66.5331833333333  24.7824833333333  2025-02-03  14:32:01
1  -66.5331833333333  24.7824833333333  2025-02-03  14:35:42
2           -66.5332          24.78185  2025-02-03  14:39:58
3           -66.5332          24.78185  2025-02-03  14:46:10
4           -66.5332          24.78185  2025-02-03  14:48:30


In [5]:
## Below code is not working as is raising minor errors.

import csv
import re

# input_file1 = 'C:\\Users\\ravic\\Desktop\\Desktop\\Glider_Tracks\\1129_Mauritius\\DLD4345449615463055265\\Surfacings.csv'    # The file you uploaded
# output_file1 = 'C:\\Users\\ravic\\Desktop\\Desktop\\Glider_Tracks\\1129_Mauritius\\DLD4345449615463055265\\parsed_surfacings.csv'  # Output file

input_file = "Surfacings.csv"
output_file = "parsed_surfacings.csv"

# Regular expression to capture date and time
gps_time_regex = re.compile(r'Time of GPS Position:\s*(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2})')

# Open input and output files
with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', newline='') as outfile:
    reader = csv.reader(infile, delimiter='\t')
    writer = csv.writer(outfile)
  
    # Write header
    writer.writerow(['Latitude', 'Longitude', 'Date', 'Time'])
    
    for row in reader:
        try:
            lon = row[0]
            lat = row[1]
            info = row[3]
            
            # Search for the GPS time
            match = gps_time_regex.search(info)
            if match:
                date = match.group(1)
                time = match.group(2)
                
                # Write the extracted data
                writer.writerow([lat, lon, date, time])
        except IndexError:
            # Skip incomplete rows
            continue

print(f"✅ Done! Extracted data saved in {output_file}")

✅ Done! Extracted data saved in parsed_surfacings.csv


In [21]:
## Below code is not working as is raising minor errors.

import csv
import re

# Input and output filenames
input_file = "Surfacings.csv"
output_file = "parsed_surfacings.csv"

# Regular expression to capture date and time
gps_time_regex = re.compile(r'Time of GPS Position:\s*(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2})')

# Open the input file
with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', newline='') as outfile:
    reader = csv.reader(infile, delimiter='\t')  # Important: delimiter is tab (\t)
    writer = csv.writer(outfile)
    
    # Write the header
    writer.writerow(['Latitude', 'Longitude', 'Date', 'Time'])
    
    for row in reader:
        if len(row) < 4:
            # If the row is incomplete, skip it
            continue
        
        lon = row[0]
        lat = row[1]
        html_text = row[3]
        
        # Search for the date and time
        match = gps_time_regex.search(html_text)
        if match:
            date = match.group(1)
            time = match.group(2)
            
            # Write extracted information
            writer.writerow([lat, lon, date, time])


print(f"✅ Extraction complete! Saved to {output_file}")


✅ Extraction complete! Saved to parsed_surfacings.csv


In [22]:
## Below code is not working as is raising minor errors.

import re

# Input and Output filenames
input_file = "Surfacings.csv"
output_file = "parsed_surfacings.csv"

# Regular expression to find "Time of GPS Position" inside the text
gps_time_regex = re.compile(r'Time of GPS Position:\s*([\d-]+)\s*([\d:]+)')

# Open the files
with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', newline='') as outfile:
    # Write header first
    outfile.write('Latitude,Longitude,Date,Time\n')
    
    for line in infile:
        parts = line.strip().split('\t')
        
        if len(parts) < 4:
            # If line does not have enough parts, skip
            continue
        
        longitude = parts[0]
        latitude = parts[1]
        html_text = parts[3]  # The messy text containing date/time
        
        # Now find date and time from the HTML text
        match = gps_time_regex.search(html_text)
        
        if match:
            date = match.group(1)
            time = match.group(2)
            
            # Write output line
            outfile.write(f"{latitude},{longitude},{date},{time}\n")

print(f"✅ Done! Output saved to {output_file}")


✅ Done! Output saved to parsed_surfacings.csv


In [11]:
## Below code is not working as is raising minor errors. 

import re

# Input and Output filenames
input_file = 'Surfacings.csv'
output_file = 'parsed_surfacings.csv'

# Regular expression to find "Time of GPS Position" inside the text
gps_time_regex = re.compile(r'Time of GPS Position:\s*([\d-]+)\s*([\d:]+)')

# Open the files
with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', newline='') as outfile:
    # Write header first
    outfile.write('Latitude,Longitude,Date,Time\n')
    
    for line in infile:
        parts = line.strip().split('\t')
        
        if len(parts) < 4:
            # If line does not have enough parts, skip
            continue
        
        longitude = parts[0]
        latitude = parts[1]
        html_text = parts[3]  # The messy text containing date/time
        
        # Now find date and time from the HTML text
        match = gps_time_regex.search(html_text)
        
        if match:
            date = match.group(1)
            time = match.group(2)
            
            # Write output line
            outfile.write(f"{latitude},{longitude},{date},{time}\n")

print(f"✅ Done! Output saved to {output_file}")


✅ Done! Output saved to parsed_surfacings.csv
