# Import Libraries

We will import the libraries needed for the data manipulation and visualization.

In [None]:
#Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data

We have a CSV file containing the locations of the vessels with respective columns like **'vessel_id'**, **'timestamp'**, **'latitude'**, and **'longitude'**. Now, we will load this data into a pandas DataFrame.

In [None]:
#Load the data into a pandas DataFrame
df = pd.read_csv('sample_data.csv')

#Display the first 5 rows of the DataFrame
print(df)

            mmsi               timestamp        lat         lon
0      565761000  2023-03-15 00:27:44+00   1.268780  103.758270
1      538008084  2023-03-19 23:30:00+00  43.559620   10.294040
2      564654000  2023-03-12 08:22:53+00   1.237250  103.891350
3      529123000  2023-03-05 16:47:42+00  29.443670   48.930660
4      564780000  2023-03-11 06:35:20+00   1.277550  103.610260
...          ...                     ...        ...         ...
13496  218719092  2023-03-21 08:30:00+00  44.168871    9.104404
13497  564654000  2023-03-13 22:42:16+00   1.257010  103.841010
13498  564654000  2023-03-05 10:15:11+00   1.280430  103.907730
13499  565761000  2023-03-19 07:30:00+00   1.302624  103.951899
13500  564654000  2023-03-17 03:46:29+00   1.251010  103.863580

[13501 rows x 4 columns]


# Calculate Distance Between Vessels

As instructed, we will use the Haversine formula to calculate the distance between two vessel locations. To do this, we will defile a function for the Haversine distance and then apply pandas vectorization method on it as mentioned in the assignment.

In [None]:
# Define a function to calculate the Haversine distance
def haversine(lon1, lat1, lon2, lat2):
  # Convert latitude and longitude from degrees to radians
  lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

  # Haversine formula
  dlon = lon2 - lon1
  dlat = lat2 - lat1
  a = np.sin(dlat/2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2) ** 2
  c = 2 * np.arcsin(np.sqrt(a))

  # Radius of the Earth in kilometers (mean radius)
  r = 6371.0

  # Calculate the results
  return c * r

# Create a key column for Cartesian product
df['key'] = 1

# Perform a Cartesian product
df_pairs = df.merge(df, on='key', suffixes=('_1', '_2'))

# Remove self-pairs (where a vessel is paired with itself)
df_pairs = df_pairs[df_pairs['mmsi_1'] != df_pairs['mmsi_2']]

# Calculate distances in a vectorized manner
df_pairs['distance'] = haversine(
    df_pairs['lon_1'], df_pairs['lat_1'],
    df_pairs['lon_2'], df_pairs['lat_2']
    )

# Display the DataFrame with calculated distances
print(df_pairs.head())


# Filter Close Proximities

Next, we will filter the data to find vessels that lie within a certain proximity distance. Let's assume we are interested in vessels that are within 1 km of each other.

In [None]:
# Set the proximity threshold in kilometers
proximity_threshold = 1.0

# Filter vessels within the proximity threshold
close_proximity_df = df_merged[df_merged['distance'] <= proximity_threshold]

# Display the vessels in close proximity
print(close_proximity_df.head(5))

# Visualize the result

Finally, we will visualize the positions of the vessels and highlight those in close proximity. We will use Matplotlib and Seaborn for this.



In [None]:
# Plot all vessel positions
plt.figure(figsize=(10, 8))
sns.scatterplot(data=df, x='longitude', y='latitude', hue='vessel_id', palette='tab10', legend=None)

# Highlight vessel in close proximity
for _, row in close_proximity_df.iterrows():
    plt.plot([row['longitude_1'], row['longitude_2']], [row['latitude_1'], row['latitude_2']], 'r-', alpha = 0.6)

# Add labels and title
plt.title('Marine Vessel Positions and Close Proximities')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)

# Show plot
plt.show()