In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from tqdm import tqdm_notebook as tqdm
import pathlib 
from pathlib import Path
from geopandas import GeoDataFrame
from shapely.geometry import Point
import fiona

import os
import csv
import requests

from datetime import date
from dateutil.rrule import rrule, DAILY

import shutil 
from urllib.request import URLopener
import urllib


**Insert input parameters**

In [2]:
a = date(2020, 11, 10)
b = date(2020, 11, 20)

ship_name = "Rosalie"

path = '/ncr2420/ANSU/6_Tasks/2109_ShipDetection/4_LotteRosalie_14_20_November_2020/'


**Dowloading the AIS data from the Danish Marine Authority**

In [3]:
mainlist = []

for dt in rrule(DAILY, dtstart=a, until=b):
    mainlist.append(dt.strftime("%Y%m%d"))

for i in tqdm(mainlist, desc='Number files to read'):
    path_full = path + i +'.csv'
    if Path(path_full).is_file():
        print(f'The file exists: {i}.')
    else:    
        url = 'ftp://ftp.ais.dk/ais_data/aisdk_'+ i +'.csv'
        urllib.request.urlretrieve(url, path_full)
        urllib.request.urlcleanup()
        print('The file was downloaded:',path_full)

HBox(children=(IntProgress(value=0, description='Number files to read', max=11, style=ProgressStyle(descriptio…

The file exists: 20201110.
The file exists: 20201111.
The file exists: 20201112.
The file exists: 20201113.
The file exists: 20201114.
The file exists: 20201115.
The file exists: 20201116.
The file exists: 20201117.
The file exists: 20201118.
The file exists: 20201119.
The file exists: 20201120.



**Filter based on a ship name and save it as a csv file**

In [4]:
paths = pathlib.Path(path).rglob('*.csv')
paths_list = list(paths) 

file_names = []
file_names_selected = []

for f in tqdm(paths_list,desc='Number files to read'):
    if "selected" in f.name:
        file_names_selected.append(f.name[:-4])
    elif not "selected" in f.name:
        file_names.append(f.name[:-4])
        
for file in file_names:
    if not file + "_selected" in file_names_selected:
        print(f'Selected file does not exit for:{file}')
        df = pd.read_csv(f'{path}{file}.csv')
        df_selected = df[df['Name'].str.contains(ship_name,na=False)]
        df_selected.to_csv(f'{path+file}_selected.csv')
    else:
       print(f'Selected file exit for:{file}') 
            


HBox(children=(IntProgress(value=0, description='Number files to read', max=22, style=ProgressStyle(descriptio…


Selected file exit for:20201110
Selected file exit for:20201111
Selected file exit for:20201112
Selected file exit for:20201113
Selected file exit for:20201114
Selected file exit for:20201115
Selected file exit for:20201116
Selected file exit for:20201117
Selected file exit for:20201118
Selected file exit for:20201119
Selected file exit for:20201120


**Unique ship name**

In [None]:
# check all unique vessels names
df = pd.read_csv(f'{path}{file}.csv')
unique_names = df['Name'].unique()

# for i in unique_names:print (i)

**Print Timestamp from selected files**

In [5]:
# Search for files named with a suffix of selected
paths_selected = pathlib.Path(path).rglob('*_selected.csv')
paths_list_selected = list(paths_selected)  

for f in paths_list_selected:
    print(f.name)                                                                       
    df = pd.read_csv(f'{f}')
    df['dt'] = pd.to_datetime(df['# Timestamp'])
    df_Timestamp_max  = df['dt'].max()
    df_Timestamp_min  = df['dt'].min()
    print(df_Timestamp_max, '  -  ' ,df_Timestamp_min)
    

20201110_selected.csv
NaT   -   NaT
20201111_selected.csv
NaT   -   NaT
20201112_selected.csv
NaT   -   NaT
20201113_selected.csv
NaT   -   NaT
20201114_selected.csv
NaT   -   NaT
20201115_selected.csv
NaT   -   NaT
20201116_selected.csv
NaT   -   NaT
20201117_selected.csv
NaT   -   NaT
20201118_selected.csv
NaT   -   NaT
20201119_selected.csv
NaT   -   NaT
20201120_selected.csv
NaT   -   NaT


**Create shapfiles from csv**

In [None]:
# Directory containing your .csv files

paths_selected = pathlib.Path(path).rglob('*_selected.csv')
paths_list_selected = list(paths_selected)  

# Read files sequentially
for file in paths_list_selected:
    df = pd.read_csv(file)          #Reading your csv file with pandas
        
    # Create tuples of geometry by zipping Longitude and latitude columns in your csv file
    geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)] 
        
    # Define coordinate reference system on which to project your resulting shapefile
    crs = {'init': 'epsg:4326'}
        
    # Convert pandas object (containing your csv) to geodataframe object using geopandas
    gdf = GeoDataFrame(df, crs = crs, geometry=geometry)
        
    # Save file to local destination
    gdf.to_file(filename=str(file)[:-4]+'.shp', driver='ESRI Shapefile')


**Select the timestamp period:**

In [7]:
# df['dt'] = pd.to_datetime(df['# Timestamp'])
# df_selected = df[(df['dt'] > '2021-01-09 05:44:59') & (df['dt'] < '2021-01-09 05:55:01')]
# df_selected = df[(df['Name'] = 'Jolissa') 
# df[df['ids'].str.contains("ball")]
#df_selected.to_csv('/ncr2420/ANSU/6_Tasks/2109_ShipDetection/aisdk_20200605_selected.csv')