In [1]:
import pandas as pd
import numpy as np
np.set_printoptions(suppress=True)
import vptree
from geopy.distance import great_circle
from scipy.spatial import distance
import random
import time
from pandas import datetime

In [None]:
start = time.time()
temp_df = pd.read_csv('AIS_2017_12_Zone11.csv')
end = time.time()
print('AIS 2017 Zone 11 Loaded.  Time: '+str(round(end-start,2)))

In [None]:
old_df = temp_df.copy()
temp_df.head()

In [None]:
temp_df['BaseDateTime'] = pd.to_datetime(temp_df['BaseDateTime'])
temp_df.BaseDateTime.dt.normalize().value_counts()

In [None]:
temp_df.Status.unique()

In [None]:
temp_df.BaseDateTime.value_counts()

In [None]:
temp_df.drop(['VesselName','IMO','CallSign','VesselType','Status','Length','Width','Draft','Cargo','LAT','LON'], axis=1, inplace=True)

In [None]:
temp_df.set_index('MMSI',inplace=True)
temp_df['modified_COG'] = temp_df.COG%360
temp_df.drop(['COG'], axis=1, inplace=True)
temp_df.head()

In [None]:
temp_df.loc[367047120,:].sort_values(by=['BaseDateTime'])

In [None]:
df = temp_df.loc[367047120,:].sort_values(by=['BaseDateTime'])

In [None]:
(df.modified_COG<0).unique()

In [None]:
temp_df.drop(['COG'], axis=1, inplace=True)

In [None]:
np_temp_df = temp_df.values

In [None]:
type(np_temp_df)

In [None]:
np_temp_df.shape

In [None]:
temp_df.shape

In [None]:
import random
ship = temp_df.loc[np.random.choice(temp_df.index)].iloc[random.randint(0,a.shape[0])].values.reshape(3,1)
ship
#ship.values.reshape(3,1)
#np_temp_df.shape

In [None]:
start = time.time()
score_cos = distance.cdist(ship.values.reshape(3,1).T, np_temp_df, 'cosine').tolist()
labels = temp_df.index.tolist()
sorted_scores = sorted(list(zip(labels,score_cos[0])), key = lambda t: t[1])
end = time.time()
print('Cosine Similarity with SciPy.  Time: '+str(round(end-start,2))

In [None]:
min(sorted_scores)

In [None]:
max(sorted_scores)

In [None]:
old_df.loc[1071867621,:].sort_values(by=['BaseDateTime'])

In [None]:
def generate_points(df_ships):
    '''
    Takes in a dataframe of ship data and uses MMSI, Latitude, and Longitude fields
    in order to tuen them into points that can be passed to the vantage tree function.
    '''
    points = tuple(zip(df_ships.index, df_ships.LAT, df_ships.LON))
    return points

def get_distance_in_meters(point1, point2):
    '''
    Returns the distance in meters between two points.
    '''
    p1_lat_lon = (point1[1], point1[2])
    p2_lat_lon = (point2[1], point2[2])
    return great_circle(p1_lat_lon, p2_lat_lon).m

def generate_vantage_point_tree(points):    
    tree = vptree.VPTree(points, get_distance_in_meters)
    return tree 

In [None]:
def initialize_data_old(filepath):
    start = time.time()
    df = pd.read_csv(filepath)
    day = df[df['BaseDateTime'].dt.date == datetime.date(2017, 12, 31)]
    day['BaseDateTime_Rounded'] = day['BaseDateTime'].dt.round('10min')
    #day = day.reset_index(drop = True)
    
    
    
    df.drop(['VesselName','IMO','CallSign','VesselType','Status','Length','Width','Draft','Cargo','LAT','LON'], axis=1, inplace=True)
    df['modified_COG'] = df.COG%360
    df.drop(['COG'], axis=1, inplace=True)
    df.set_index('MMSI',inplace=True)
    end = time.time()
    print('Data Loaded.  Time: '+str(round(end-start,2)))
    return df 

In [None]:
#[21,22,31,32,36,37,52,1002,1001,1023,1025]
#df = df[df.Status.isin(['under way using engine','at anchor'])]
#compression='zip'

In [3]:
def initialize_data(filepath):
    start = time.time()
    df = pd.read_csv(filepath)
    df.BaseDateTime = pd.to_datetime(df.BaseDateTime, errors='raise')
    # Filter out Tug Boats & Fishing vessels
    df = df[~df.VesselType.isin([21,22,31,32,52,1023,1025])] 
    df.drop(['VesselName','IMO','CallSign','VesselType','Status','Length','Width','Draft','Cargo','LAT','LON'], axis=1, inplace=True)
    df['modified_COG'] = df.COG%360
    df.drop(['COG'], axis=1, inplace=True)
    df.set_index('MMSI',inplace=True)
    # Filter for December 31, 2017
    df = df.loc[(df.BaseDateTime < pd.to_datetime('2018-01-01')) & (df.BaseDateTime > pd.to_datetime('2017-12-31'))]
    end = time.time()
    print('Data Loaded.  Time: '+str(round(end-start,2)))
    return df

In [4]:
jared = initialize_data('AIS_2017_12_Zone11.csv')
jared.head()

Data Loaded.  Time: 87.8


Unnamed: 0_level_0,BaseDateTime,SOG,Heading,modified_COG
MMSI,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
367008510,2017-12-31 00:02:04,30.0,511.0,14.5
338210835,2017-12-31 00:00:01,0.0,511.0,195.8
538007153,2017-12-31 00:01:18,0.6,27.0,127.9
338150975,2017-12-31 00:01:14,0.0,511.0,282.5
367034320,2017-12-31 00:01:15,0.2,511.0,174.8


In [None]:
jared = jared.drop(['BaseDateTime'], axis=1, inplace=True)
jared.head(10)

In [None]:
jared.loc[367034320]

In [None]:
jared.shape

In [None]:
ship_vec = jared.values.reshape(3,1).T
ship_vec

In [None]:
durka_jared = cosine_distance(441989000,'AIS_2017_12_Zone11.csv')

In [None]:
def create_feature_matrix(filepath):
    start = time.time()
    feature_df = initialize_data(filepath)
    feature_df = feature_df.drop(['BaseDateTime'], axis=1)
    feature_matrix = feature_df.values
    np.save('ship_feature_matrix', feature_matrix)

In [None]:
def cosine_distance(MMSI,filepath):
    start = time.time()
    df = initialize_data(filepath)
    feature_df = df.copy()
    
    #df = df.loc[MMSI].iloc[np.where(df.loc[MMSI].BaseDateTime == date)]
    df = df.loc[MMSI]
    df = df.drop(['BaseDateTime'], axis=1)
    ship_vec = df.values.reshape(3,1).T
    
    feature_df = feature_df.drop(['BaseDateTime'], axis=1)
    feature_matrix = feature_df.values
    #feature_matrix = np.load('ship_feature_matrix')
    
    score_cos = distance.cdist(ship_vec, feature_matrix, 'cosine').tolist() #.values
    labels = feature_df.index.tolist()
    end = time.time()
    print('Cosine Similarity calculated.  Time: '+str(round(end-start,2)))
    return sorted(list(zip(labels,score_cos[0])), key = lambda t: t[1])

In [None]:
def ship_angle(cos_score):
    angle = []
    ships = []
    for key,value in dict(cos_score).items():
        angle.append(np.rad2deg(np.arccos(value))+180)
        ships.append(key)
    return angle, ships
#angle = convert from radians to angle arccosine(score_cos) + 180

In [None]:
def colregs_calssification(cos_score):
    angle, ships = ship_angle(cos_score)
    colgres_classification = []
    for a in angle:
        if 5 > a > 0:
            colgres_classification.append('HEADON')
        if 112.5 > a > 5:
            colgres_classification.append('CROSSING')
        if 247.5 > a > 112.5:
            colgres_classification.append('OVERTAKING')
        if 355 > a > 247.5:
            colgres_classification.append('CROSSING')
        if 360 > a > 355:
            colgres_classification.append('HEADON')
    return ships, angle, colgres_classification

In [None]:
d = initialize_data('AIS_2017_12_Zone11.zip')

In [None]:
d

In [None]:
durka = cosine_distance(367047120,'2017-12-01T00:02:21','AIS_2017_12_Zone11.zip')
durka

In [None]:
a,b,c = colregs_calssification(durka)

In [None]:
c # filter by MMSIs needed, wasting computation on 

In [None]:
a,b,c = colgres_calssification(durka)

In [None]:
ship = df.loc[367047120].iloc[np.where(df.loc[367047120].BaseDateTime == '2017-12-01T00:02:21')]
#ship_vec = ship.drop(['BaseDateTime'], axis=1, inplace=True)
#ship_vec = ship.drop(['BaseDateTime'])
#ship_vec

In [None]:
c

In [None]:
np.where(df.loc[367047120].BaseDateTime == '2017-12-01T00:02:21')[0][0]

In [None]:
df.loc[367047120].iloc[3].drop(['BaseDateTime']).values.reshape(3,1).T

In [None]:
df.loc[367047120]

In [None]:
type(durka[0])
#np.rad2deg(score_cos)
#angle = convert from radians to angle arccosine(score_cos) + 180

In [None]:
if 'HEADON' in colgres_classification:
    print('DURKA')

In [None]:
for a in angle:
    if a<5:
        print('durka')
    if a>355:
        print('durka doo')