1. Filtering Files by Area and Copying Them

We will create a method to filter files based on a specified geographical area and ensure vessels have at least 10 consecutive entries in the area.

In [9]:
import pandas as pd
import os

def filter_files_by_area(input_folder, output_folder, area_bounds):
    """
    Filters vessel files to include only those with at least 10 consecutive entries in a specified area.
    Copies these files to the output folder and removes rows outside the area.
    
    :param input_folder: Folder containing input files (vessel data)
    :param output_folder: Folder to save filtered files
    :param area_bounds: Tuple with bounds (min_lat, max_lat, min_lon, max_lon)
    """
    os.makedirs(output_folder, exist_ok=True)
    min_lat, max_lat, min_lon, max_lon = area_bounds

    for file in os.listdir(input_folder):
        if file.endswith('.csv'):
            file_path = os.path.join(input_folder, file)
            df = pd.read_csv(file_path)
            
            # Filter rows within the specified area
            in_area = (
                (df['Latitude'] >= min_lat) & 
                (df['Latitude'] <= max_lat) & 
                (df['Longitude'] >= min_lon) & 
                (df['Longitude'] <= max_lon)
            )
            df_area = df[in_area]

            # Check for at least 10 consecutive entries in the area
            df_area['consecutive'] = (df_area.index.to_series().diff() == 1).astype(int).cumsum()
            if df_area['consecutive'].value_counts().max() >= 10:
                # Save filtered data to the output folder
                output_path = os.path.join(output_folder, file)
                df_area.to_csv(output_path, index=False)
                print(f"Copied and filtered: {file}")


2. Visualizing Data for a Specified Timeslot

We will generate 10 map snippets, each showing positions and vector arrows for one vessel during the specified timeslot.

In [10]:
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrow

def visualize_timeslot(folder, timeslot, output_folder="map_snippets"):
    """
    Visualizes vessel positions and vectors for a specified timeslot.
    
    :param folder: Folder containing filtered vessel files
    :param timeslot: Tuple specifying start and end times (e.g., "10:30", "11:00")
    :param output_folder: Folder to save the map snippets
    """
    os.makedirs(output_folder, exist_ok=True)
    start_time, end_time = timeslot

    files = [f for f in os.listdir(folder) if f.endswith('.csv')][:10]  # Take up to 10 files
    for i, file in enumerate(files):
        file_path = os.path.join(folder, file)
        df = pd.read_csv(file_path)
        
        # Filter rows by timeslot
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        df_timeslot = df[(df['Timestamp'].dt.time >= pd.to_datetime(start_time).time()) &
                         (df['Timestamp'].dt.time <= pd.to_datetime(end_time).time())]

        if df_timeslot.shape[0] > 10:
            # Take 10 evenly spread rows
            df_timeslot = df_timeslot.iloc[::len(df_timeslot) // 10][:10]

        # Create map snippet
        fig, ax = plt.subplots(figsize=(8, 8))
        ax.set_title(f"Vessel: {file} | Timeslot: {start_time}-{end_time}")

        for _, row in df_timeslot.iterrows():
            # Plot position
            ax.plot(row['Longitude'], row['Latitude'], 'bo')  # Blue point for position
            ax.text(row['Longitude'], row['Latitude'], row['Timestamp'].strftime('%H:%M'), fontsize=8)

            # Add vectors for COG/SOG
            cog_x = row['Longitude'] + 0.01 * row['SOG'] * np.cos(np.radians(row['COG']))
            cog_y = row['Latitude'] + 0.01 * row['SOG'] * np.sin(np.radians(row['COG']))
            ax.add_patch(FancyArrow(row['Longitude'], row['Latitude'], 
                                    cog_x - row['Longitude'], cog_y - row['Latitude'], 
                                    width=0.0002, color='red', label='COG/SOG'))

            # Add vectors for Heading
            heading_x = row['Longitude'] + 0.01 * np.cos(np.radians(row['Heading']))
            heading_y = row['Latitude'] + 0.01 * np.sin(np.radians(row['Heading']))
            ax.add_patch(FancyArrow(row['Longitude'], row['Latitude'], 
                                    heading_x - row['Longitude'], heading_y - row['Latitude'], 
                                    width=0.0002, color='green', label='Heading'))

        ax.set_xlabel("Longitude")
        ax.set_ylabel("Latitude")
        ax.legend(["Position", "COG/SOG", "Heading"], loc='upper left')

        # Save the map snippet
        snippet_path = os.path.join(output_folder, f"snippet_{i+1}.png")
        plt.savefig(snippet_path)
        plt.close(fig)
        print(f"Map snippet saved: {snippet_path}")


In [11]:
# Example usage:

# 1. Filter files by area
area_bounds = (56.0, 57.0, 7.0, 8.0)  # Define area (min_lat, max_lat, min_lon, max_lon)
input_folder = "grouped_files"
output_folder = "area_vessels"
filter_files_by_area(input_folder, output_folder, area_bounds)

# 2. Visualize timeslot
timeslot = ("10:30", "11:00")  # Define timeslot
visualize_timeslot(output_folder, timeslot)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_area['consecutive'] = (df_area.index.to_series().diff() == 1).astype(int).cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_area['consecutive'] = (df_area.index.to_series().diff() == 1).astype(int).cumsum()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_area['consecutive'] = (df_area