In [62]:
import functions as fn
import pandas as pd
from importlib import reload
from tqdm import tqdm
from termcolor import colored
import matplotlib.pyplot as plt
from ooipy.tools import ooiplotlib as ooiplt
import ooipy
import numpy as np
from datetime import datetime,timedelta

In [68]:
def get_no_ships_slope_hydrate (df,rad,min_d,freq,count_iso_instances): #optimised version
    
    count_noship_instances=int(count_iso_instances+count_iso_instances*0.20)
    min_date=df['TIMESTAMP UTC'].min()
    max_date=df['TIMESTAMP UTC'].max()
    num_months=int((max_date-min_date).total_seconds()/2629800)
    count_per_month=int(count_noship_instances/num_months)
    
    df=df.sort_values(by=['TIMESTAMP UTC'],ascending=True)
    df_time_grp=df.groupby(pd.Grouper(key="TIMESTAMP UTC", freq=freq)).agg({'TIMESTAMP UTC':['min','max'],
                                                                               'MMSI':'count',
                                                                              'distance(in km) oregon':'min',
                                                                              'distance(in km) hydrate':'min'}).reset_index()
    df_time_grp.columns=['TIMESTAMP UTC','min_time','max_time','count','min_slope_dist','min_hydrate_dist']
    
    df_time_grp['start_time']=df_time_grp['TIMESTAMP UTC']
    df_time_grp['end_time']=df_time_grp['TIMESTAMP UTC']+timedelta(minutes=min_d)
    df_time_grp['slope_noship']=[1 if x>rad else 0 for x in df_time_grp['min_slope_dist'] ]
    df_time_grp['hydrate_noship']=[1 if x>rad else 0 for x in df_time_grp['min_hydrate_dist'] ]
    df_time_grp['YY-MM']=df_time_grp['TIMESTAMP UTC'].dt.strftime('%Y-%m')
    df_time_grp1=df_time_grp[(df_time_grp['slope_noship']==1) | (df_time_grp['hydrate_noship']==1)]
    
    #stratified sampling on YY-MM
    df_time_grp1=df_time_grp1.groupby('YY-MM', group_keys=False).apply(lambda x: x.sample(count_per_month))
    return df_time_grp1

In [69]:
def get_no_ships_axial(df,rad,min_d,freq,count_iso_instances): #optimised version
    
    count_noship_instances=int(count_iso_instances+count_iso_instances*0.20)
    min_date=df['TIMESTAMP UTC'].min()
    max_date=df['TIMESTAMP UTC'].max()
    num_months=int((max_date-min_date).total_seconds()/2629800)
    count_per_month=int(count_noship_instances/num_months)
    
    df=df.sort_values(by=['TIMESTAMP UTC'],ascending=True)
    df_time_grp=df.groupby(pd.Grouper(key="TIMESTAMP UTC", freq=freq)).agg({'TIMESTAMP UTC':['min','max'],
                                                                               'MMSI':'count','distance(in km) axial':'min',
                                                                              'distance(in km) central cald':'min',
                                                                              'distance(in km) eastern cald':'min'}).reset_index()
    df_time_grp.columns=['TIMESTAMP UTC','min_time','max_time','count','min_axial_dist','min_central_dist','min_eastern_dist']
    
    df_time_grp['start_time']=df_time_grp['TIMESTAMP UTC']
    df_time_grp['end_time']=df_time_grp['TIMESTAMP UTC']+timedelta(minutes=min_d)
    df_time_grp['axial_noship']=[1 if x>rad else 0 for x in df_time_grp['min_axial_dist'] ]
    df_time_grp['central_noship']=[1 if x>rad else 0 for x in df_time_grp['min_central_dist'] ]
    df_time_grp['eastern_noship']=[1 if x>rad else 0 for x in df_time_grp['min_central_dist'] ]
    df_time_grp['YY-MM']=df_time_grp['TIMESTAMP UTC'].dt.strftime('%Y-%m')
    df_time_grp1=df_time_grp[(df_time_grp['axial_noship']==1) | (df_time_grp['central_noship']==1) | (df_time_grp['eastern_noship']==1) ]
    
    #stratified sampling on YY-MM
    df_time_grp1=df_time_grp1.groupby('YY-MM', group_keys=False).apply(lambda x: x.sample(count_per_month))
    return df_time_grp1

In [95]:
def get_spectrogram_axial(df,fmin=10,fmax=90):
    print('Saving spectrograms')
    for i in tqdm(range(len(df))):
        min_time=df.start_time.iloc[i]
        max_time=df.end_time.iloc[i]
        if df.axial_noship.iloc[i]==1:
            hydrophone_idx=1
            hydro='Axial_Base'
        elif df.central_noship.iloc[i]==1:
            hydrophone_idx=2
            hydro='Central_Caldera'
        elif df.eastern_noship.iloc[i]==1:
            hydrophone_idx=3
            hydro='Eastern_Caldera'
        
        spectrogram=fn.get_spectrogram_data(hydrophone_idx,min_time,max_time,fmin=fmin,fmax=fmax)
        if (spectrogram==None):
            # print('data trace is none. Continuing to next')
            pass
        else:
            if (spectrogram.values.shape[0]<2) | (spectrogram.values.shape[1]<2):
                pass
            else:
                #spectrogram.visualize(save_spec=True,plot_spec=False,filename=path_to_write+hydro+'/Spectrogram'+'/isolated_ships_'+str(inner_rad)+'_'+ str(outer_rad)+'_'+str(i)+'.png')
                ooiplt.plot_spectrogram(spectrogram,plot=False,save=True,fmin=fmin,fmax=fmax,filename=path_to_write+hydro+'/Spectrogram'+'/isolated_ships_'+str(rad)+'_'+str(i)+'.png')

In [96]:
def get_spectrogram_slope_hydrate(df,fmin=10,fmax=90,rad=10):
    print('Saving spectrograms')
    for i in tqdm(range(len(df))):
        min_time=df.start_time.iloc[i]
        max_time=df.end_time.iloc[i]
        if df.slope_noship.iloc[i]==1:
            hydrophone_idx=4
            hydro='Oregon_Slope'
        elif df.hydrate_noship.iloc[i]==1:
            hydrophone_idx=5
            hydro='Southern_Hydrate'
       
        
        spectrogram=fn.get_spectrogram_data(hydrophone_idx,min_time,max_time,fmin=fmin,fmax=fmax)
        if (spectrogram==None):
            # print('data trace is none. Continuing to next')
            pass
        else:
            if (spectrogram.values.shape[0]<2) | (spectrogram.values.shape[1]<2):
                pass
            else:
                #spectrogram.visualize(save_spec=True,plot_spec=False,filename=path_to_write+hydro+'/Spectrogram'+'/isolated_ships_'+str(inner_rad)+'_'+ str(outer_rad)+'_'+str(i)+'.png')
                ooiplt.plot_spectrogram(spectrogram,plot=False,save=True,fmin=fmin,fmax=fmax,filename=path_to_write+hydro+'/Spectrogram'+'/isolated_ships_'+str(rad)+'_'+str(i)+'.png')

### Getting no ships data for axial, central and eastern caldera

In [16]:
df = pd.read_csv('E:/Acer backup 3/internships and jobs/UW/Reader grader and TA/Shima Abadi/Data/ais_2014B_2020B_V2.csv',sep = ',')

df['TIMESTAMP UTC']=df['TIMESTAMP UTC'].astype('datetime64[ns]')

In [17]:
#removing airgun experiment period
start_time=datetime(2019,7,1,0,0,0)
end_time=datetime(2019,8,1,0,0,0)


In [18]:
df=df[(df['TIMESTAMP UTC']<start_time) | (df['TIMESTAMP UTC'] > end_time)]
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,MMSI,SHIPNAME,VESSEL TYPE,STATUS,SPEED (KNOTSx10),LAT,LON,COURSE,...,central_caldera_Loc,eastern_caldera_Lat,eastern_caldera_Lon,eastern_caldera_Loc,distance(in km) axial,distance(in km) central cald,distance(in km) eastern cald,bearing_axial,bearing_eastern_cald,bearing_central_cald
0,0,0,209605000,AKILI,Bulk Carrier,0,108,46.09859,-129.655,83,...,"(45.9549, -130.0089)",45.9399,-129.9742,"(45.9399, -129.9742)",32.25,31.72,30.37,193.760689,234.515164,239.809636
1,1,1,256832000,,,0,115,45.07486,-128.943,178,...,"(45.9549, -130.0089)",45.9399,-129.9742,"(45.9399, -129.9742)",104.04,128.46,125.45,322.804182,320.490292,320.057435
2,2,2,352358000,ANNA G,Bulk Carrier,0,132,45.86138,-130.5627,86,...,"(45.9549, -130.0089)",45.9399,-129.9742,"(45.9399, -129.9742)",63.02,44.21,46.5,94.234035,78.935325,76.159593
3,3,3,356566000,GLOBAL SAIKAI,General Cargo,0,104,46.50261,-129.0129,272,...,"(45.9549, -130.0089)",45.9399,-129.9742,"(45.9399, -129.9742)",95.33,98.03,97.02,217.082385,230.114815,231.878577
4,4,4,477293500,JIN XIU FENG,Bulk Carrier,0,112,46.74633,-129.5912,299,...,"(45.9549, -130.0089)",45.9399,-129.9742,"(45.9399, -129.9742)",104.08,93.66,94.37,186.960616,198.29122,200.167826


In [19]:
df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'MMSI', 'SHIPNAME', 'VESSEL TYPE',
       'STATUS', 'SPEED (KNOTSx10)', 'LAT', 'LON', 'COURSE', 'HEADING',
       'TIMESTAMP UTC', 'LENGTH', 'Year', 'axial_Lat', 'axial_Lon',
       'axial_Loc', 'ship_Loc', 'central_caldera_Lat', 'central_caldera_Lon',
       'central_caldera_Loc', 'eastern_caldera_Lat', 'eastern_caldera_Lon',
       'eastern_caldera_Loc', 'distance(in km) axial',
       'distance(in km) central cald', 'distance(in km) eastern cald',
       'bearing_axial', 'bearing_eastern_cald', 'bearing_central_cald'],
      dtype='object')

In [58]:
# def get_no_ships(df,rad,min_d,count_iso_instances): #optimised version
#     count_noship_instances=int(count_iso_instances+count_iso_instances*0.20)
#     min_date=df['TIMESTAMP UTC'].min()
#     max_date=df['TIMESTAMP UTC'].max()
#     num_months=int((max_date-min_date).total_seconds()/2629800)
#     count_per_month=int(count_noship_instances/num_months)
#     Axial_start=[]
#     Axial_end=[]
#     Central_start=[]
#     Central_end=[]
#     Eastern_start=[]
#     Eastern_end=[]
#     vessels=df[['MMSI','VESSEL TYPE']].drop_duplicates(subset=['MMSI'])
#     df=df.sort_values(by=['TIMESTAMP UTC'],ascending=True)
#     start_time=df['TIMESTAMP UTC'].min()
#     end_time=start_time+timedelta(minutes=min_d)
    
#     for i in range(1,num_months+1):
        
#     while end_time < df['TIMESTAMP UTC'].max():
        
#         df_temp=df[(df['TIMESTAMP UTC']>=start_time)& (df['TIMESTAMP UTC']<end_time)]
        
#         if df_temp['distance(in km) axial'].min() > rad:
#             Axial_start.append(start_time)
#             Axial_end.append(end_time)
            
#         if df_temp['distance(in km) central cald'].min() > rad:
#             Central_start.append(start_time)
#             Central_end.append(end_time)
            
#         if df_temp['distance(in km) eastern cald'].min() > rad:
#             Eastern_start.append(start_time)
#             Eastern_end.append(end_time)
            
            
#         start_time=end_time
#         end_time=start_time+timedelta(minutes=min_d)
            
   
#     return Axial_start,Axial_end,Central_start,Central_end,Eastern_start,Eastern_end

In [56]:
#Usig
no_ships_axial=get_no_ships(df,2.6,10,'10min',10000)

In [57]:
no_ships_axial

Unnamed: 0,TIMESTAMP UTC,min_time,max_time,count,min_axial_dist,min_central_dist,min_eastern_dist,start_time,end_time,axial_noship,central_noship,eastern_noship,YY-MM
1616,2015-01-12 05:40:00,2015-01-12 05:40:00,2015-01-12 05:46:00,2,107.14,88.20,90.19,2015-01-12 05:40:00,2015-01-12 05:50:00,1,1,1,2015-01
1177,2015-01-09 04:30:00,2015-01-09 04:30:00,2015-01-09 04:39:00,6,34.61,49.02,46.95,2015-01-09 04:30:00,2015-01-09 04:40:00,1,1,1,2015-01
4092,2015-01-29 10:20:00,2015-01-29 10:20:11,2015-01-29 10:20:11,1,95.48,100.19,99.58,2015-01-29 10:20:00,2015-01-29 10:30:00,1,1,1,2015-01
1157,2015-01-09 01:10:00,2015-01-09 01:10:00,2015-01-09 01:18:00,6,38.30,20.44,21.65,2015-01-09 01:10:00,2015-01-09 01:20:00,1,1,1,2015-01
4353,2015-01-31 05:50:00,2015-01-31 05:50:48,2015-01-31 05:59:47,5,44.97,62.82,60.49,2015-01-31 05:50:00,2015-01-31 06:00:00,1,1,1,2015-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...
314140,2020-12-21 13:00:00,2020-12-21 13:02:40,2020-12-21 13:07:58,5,80.60,76.38,76.15,2020-12-21 13:00:00,2020-12-21 13:10:00,1,1,1,2020-12
312141,2020-12-07 15:50:00,2020-12-07 15:55:36,2020-12-07 15:58:53,2,93.22,93.64,92.89,2020-12-07 15:50:00,2020-12-07 16:00:00,1,1,1,2020-12
312673,2020-12-11 08:30:00,2020-12-11 08:31:19,2020-12-11 08:39:39,8,99.98,93.37,93.58,2020-12-11 08:30:00,2020-12-11 08:40:00,1,1,1,2020-12
313898,2020-12-19 20:40:00,2020-12-19 20:40:12,2020-12-19 20:49:03,11,59.91,47.50,50.66,2020-12-19 20:40:00,2020-12-19 20:50:00,1,1,1,2020-12


### Getting no ships data for oregon slope and southern hydrate

In [70]:
df = pd.read_csv('E:/Acer backup 3/internships and jobs/UW/Reader grader and TA/Shima Abadi/Data/oregon_hydrate.csv',sep = ',')

df['TIMESTAMP UTC']=df['TIMESTAMP UTC'].astype('datetime64[ns]')

In [75]:
no_ships_slope_hydrate=get_no_ships_slope_hydrate(df,2.9,10,'10min',6000)

In [76]:
no_ships_slope_hydrate

Unnamed: 0,TIMESTAMP UTC,min_time,max_time,count,min_slope_dist,min_hydrate_dist,start_time,end_time,slope_noship,hydrate_noship,YY-MM
4178,2017-01-30 10:00:00,2017-01-30 10:00:55,2017-01-30 10:09:26,8,31.69,38.12,2017-01-30 10:00:00,2017-01-30 10:10:00,1,1,2017-01
2185,2017-01-16 13:50:00,2017-01-16 13:50:42,2017-01-16 13:59:13,9,28.61,25.80,2017-01-16 13:50:00,2017-01-16 14:00:00,1,1,2017-01
4129,2017-01-30 01:50:00,2017-01-30 01:50:30,2017-01-30 01:59:50,8,34.91,40.16,2017-01-30 01:50:00,2017-01-30 02:00:00,1,1,2017-01
4029,2017-01-29 09:10:00,2017-01-29 09:10:16,2017-01-29 09:19:46,18,6.37,22.79,2017-01-29 09:10:00,2017-01-29 09:20:00,1,1,2017-01
2004,2017-01-15 07:40:00,2017-01-15 07:40:18,2017-01-15 07:49:01,17,13.28,6.33,2017-01-15 07:40:00,2017-01-15 07:50:00,1,1,2017-01
...,...,...,...,...,...,...,...,...,...,...,...
285984,2022-06-10 09:40:00,2022-06-10 09:40:01,2022-06-10 09:49:20,3,12.80,8.71,2022-06-10 09:40:00,2022-06-10 09:50:00,1,1,2022-06
288903,2022-06-30 16:10:00,2022-06-30 16:10:13,2022-06-30 16:19:33,7,33.14,50.11,2022-06-30 16:10:00,2022-06-30 16:20:00,1,1,2022-06
285937,2022-06-10 01:50:00,2022-06-10 01:58:15,2022-06-10 01:58:15,1,19.48,10.39,2022-06-10 01:50:00,2022-06-10 02:00:00,1,1,2022-06
286367,2022-06-13 01:30:00,2022-06-13 01:30:14,2022-06-13 01:34:03,2,19.53,10.33,2022-06-13 01:30:00,2022-06-13 01:40:00,1,1,2022-06


In [77]:
df.columns

Index(['Unnamed: 0', 'MMSI', 'BaseDateTime', 'LAT', 'LON', 'SOG', 'COG',
       'Heading', 'VesselName', 'IMO', 'CallSign', 'Status', 'LENGTH', 'Width',
       'Draft', 'Cargo', 'TransceiverClass', 'TIMESTAMP UTC',
       'oregon_slope_Lat', 'oregon_slope_Lon', 'oregon_slope_Loc', 'ship_Loc',
       'Southern_hydrate_Lat', 'Southern_hydrate_lon', 'Southern_hydrate_Loc',
       'distance(in km) oregon', 'distance(in km) hydrate', 'Type Code',
       'VESSEL TYPE', 'Group', 'group3'],
      dtype='object')

In [78]:
path_to_write='E:/Acer backup 3/internships and jobs/UW/Reader grader and TA/Shima Abadi/Data/Benchmark_data/No_ships'

### Get spectrogram images

Axial, Central and Eastern

In [97]:
get_spectrogram_axial(no_ships_axial)

Saving spectrograms


  0%|                                                                                        | 0/11786 [00:00<?, ?it/s]

Downloading mseed file...


  0%|                                                                              | 1/11786 [00:01<5:27:49,  1.67s/it]

   Specific Time window timed out.
None
data trace is none. Continuing to next
Downloading mseed file...


  0%|                                                                              | 2/11786 [00:03<5:57:05,  1.82s/it]

   Specific Time window timed out.
None
data trace is none. Continuing to next
Downloading mseed file...


  0%|                                                                              | 2/11786 [00:05<9:41:54,  2.96s/it]

OO.AXBA1.AXBA1.HDH | 2015-01-29T10:20:00.000000Z - 2015-01-29T10:30:00.000000Z | 200.0 Hz, 120001 samples





NameError: name 'inner_rad' is not defined

In [86]:
no_ships_axial.start_time.iloc[0]

Timestamp('2015-01-12 05:40:00')