<a href="https://colab.research.google.com/github/patty0324/GIS_Project/blob/main/GIS_Assigment_SkyServe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd # Using Python Libraries Pandas and Numpy
import numpy as np

# Load the sample data
file_path = '/content/sample_data.csv'
df = pd.read_csv(file_path)

# Display the first few rows to understand the structure
print(df.head())


# Applying Haversine Formula
def haversine(lat1, lon1, lat2, lon2): # This formula is used to Calculate the Distance between two  Geographical points
    R = 6371.0  # Earth radius in kilometers
    phi1 = np.radians(lat1)# Hence the Lat and Lon is in degrees so we convert it into radian first
    phi2 = np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)

    a = np.sin(delta_phi/2.0)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda/2.0)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))

    distance = R * c
    return distance



        mmsi               timestamp       lat        lon
0  565761000  2023-03-15 00:27:44+00   1.26878  103.75827
1  538008084  2023-03-19 23:30:00+00  43.55962   10.29404
2  564654000  2023-03-12 08:22:53+00   1.23725  103.89135
3  529123000  2023-03-05 16:47:42+00  29.44367   48.93066
4  564780000  2023-03-11 06:35:20+00   1.27755  103.61026


In [6]:
from scipy.spatial import cKDTree

def detect_proximity_events(df, threshold_distance):
    proximity_events = []

    for timestamp, group in df.groupby('timestamp'):
        # Convert coordinates to radians
        coords = np.radians(group[['lat', 'lon']].values)

        # Build the Quadtree (or k-d tree in this case) for efficient proximity search
        tree = cKDTree(coords)

        # Find all pairs within the threshold distance
        pairs = tree.query_pairs(np.radians(threshold_distance / 6371.0))  # Earth radius in km

        # Store the results
        for i, j in pairs:
            mmsi_i = group.iloc[i]['mmsi']
            mmsi_j = group.iloc[j]['mmsi']
            proximity_events.append({
                'mmsi': mmsi_i,
                'vessel_proximity': [mmsi_j],
                'timestamp': timestamp
            })
            proximity_events.append({
                'mmsi': mmsi_j,
                'vessel_proximity': [mmsi_i],
                'timestamp': timestamp
            })

    # Convert to DataFrame
    return pd.DataFrame(proximity_events)

# Define the threshold distance we are considering as example so we can change it dynamically (e.g., 10 kilometers)
threshold_distance = 10.0

# Detect proximity events
output_df = detect_proximity_events(df, threshold_distance)

# Display the output DataFrame with name output_df
print(output_df)


          mmsi vessel_proximity               timestamp
0    565761000      [352002300]  2023-03-12 14:30:00+00
1    352002300      [565761000]  2023-03-12 14:30:00+00
2    565761000      [352002300]  2023-03-12 15:30:00+00
3    352002300      [565761000]  2023-03-12 15:30:00+00
4    352002300      [565761000]  2023-03-12 16:30:00+00
..         ...              ...                     ...
415  563014650      [563014650]  2023-03-24 04:58:36+00
416  563078430      [563078430]  2023-03-24 04:58:52+00
417  563078430      [563078430]  2023-03-24 04:58:52+00
418  518998309      [518998309]  2023-03-24 04:59:46+00
419  518998309      [518998309]  2023-03-24 04:59:46+00

[420 rows x 3 columns]


In [7]:
import plotly.graph_objects as go

def plot_proximity_events(df, output_df):
    # Create the base map
    fig = go.Figure()

    # Add vessels positions to the map
    fig.add_trace(go.Scattergeo(
        lon=df['lon'],
        lat=df['lat'],
        text=df['mmsi'],
        mode='markers',
        marker=dict(
            size=6,
            color='Red',
            symbol='circle'
        ),
        name='Vessel Positions'
    ))

    # Add lines for proximity events
    for _, row in output_df.iterrows():
        vessel_mmsi = row['mmsi']
        vessel_data = df[df['mmsi'] == vessel_mmsi].iloc[0]

        for other_mmsi in row['vessel_proximity']:
            other_data = df[df['mmsi'] == other_mmsi].iloc[0]

            fig.add_trace(go.Scattergeo(
                lon=[vessel_data['lon'], other_data['lon']],
                lat=[vessel_data['lat'], other_data['lat']],
                mode='lines',
                line=dict(width=2, color='blue'),
                opacity=0.6,
                name=f'Proximity: {vessel_mmsi} & {other_mmsi}'
            ))

    # Update the layout of the map
    fig.update_layout(
        title='Vessel Proximity Events',
        showlegend=False,
        geo=dict(
            scope='world',
            projection_type='equirectangular',
            showland=True,
        )
    )

    fig.show()

# Plot the proximity events
plot_proximity_events(df, output_df)
