In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from tqdm import tqdm_notebook as tqdm
import pathlib 
from pathlib import Path
from geopandas import GeoDataFrame
from shapely.geometry import Point
import fiona

import os
import csv
import requests

from datetime import date
from dateutil.rrule import rrule, DAILY

import shutil 
from urllib.request import URLopener
import urllib


**Insert input parameters**

In [None]:
a = date(2020, 11, 10)
b = date(2020, 11, 20)

ship_name = "Rosalie"

path = '/ncr2420/ANSU/6_Tasks/2109_ShipDetection/4_LotteRosalie_14_20_November_2020/'


**Dowloading the AIS data from the Danish Marine Authority**

In [None]:
mainlist = []

for dt in rrule(DAILY, dtstart=a, until=b):
    mainlist.append(dt.strftime("%Y%m%d"))

for i in tqdm(mainlist, desc='Number files to read'):
    path_full = path + i +'.csv'
    if Path(path_full).is_file():
        print(f'The file exists: {i}.')
    else:    
        url = 'ftp://ftp.ais.dk/ais_data/aisdk_'+ i +'.csv'
        urllib.request.urlretrieve(url, path_full)
        urllib.request.urlcleanup()
        print('The file was downloaded:',path_full)

**Filter based on a ship name and save it as a csv file**

In [None]:
paths = pathlib.Path(path).rglob('*.csv')
paths_list = list(paths) 

file_names = []
file_names_selected = []

for f in tqdm(paths_list,desc='Number files to read'):
    if "selected" in f.name:
        file_names_selected.append(f.name[:-4])
    elif not "selected" in f.name:
        file_names.append(f.name[:-4])
        
for file in file_names:
    if not file + "_selected" in file_names_selected:
        print(f'Selected file does not exit for:{file}')
        df = pd.read_csv(f'{path}{file}.csv')
        df_selected = df[df['Name'].str.contains(ship_name,na=False)]
        df_selected.to_csv(f'{path+file}_selected.csv')
    else:
       print(f'Selected file exit for:{file}') 
            


**Unique ship name**

In [None]:
# check all unique vessels names
df = pd.read_csv(f'{path}{file}.csv')
unique_names = df['Name'].unique()

# for i in unique_names:print (i)

**Print Timestamp from selected files**

In [None]:
# Search for files named with a suffix of selected
paths_selected = pathlib.Path(path).rglob('*_selected.csv')
paths_list_selected = list(paths_selected)  

for f in paths_list_selected:
    print(f.name)                                                                       
    df = pd.read_csv(f'{f}')
    df['dt'] = pd.to_datetime(df['# Timestamp'])
    df_Timestamp_max  = df['dt'].max()
    df_Timestamp_min  = df['dt'].min()
    print(df_Timestamp_max, '  -  ' ,df_Timestamp_min)
    

**Select the timestamp period:**

In [None]:
# df['dt'] = pd.to_datetime(df['# Timestamp'])
# df_selected = df[(df['dt'] > '2021-01-09 05:44:59') & (df['dt'] < '2021-01-09 05:55:01')]
# df_selected = df[(df['Name'] = 'Jolissa') 
# df[df['ids'].str.contains("ball")]

In [None]:
#df_selected.to_csv('/ncr2420/ANSU/6_Tasks/2109_ShipDetection/aisdk_20200605_selected.csv')

**Create shapfiles from csv**

In [None]:
# Directory containing your .csv files

paths_selected = pathlib.Path(path).rglob('*_selected.csv')
paths_list_selected = list(paths_selected)  

# 2 Read files sequentially
for file in paths_list_selected:
    df = pd.read_csv(file)          #Reading your csv file with pandas
        
    # 3 Create tuples of geometry by zipping Longitude and latitude columns in your csv file
    geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)] 
        
    # 4 Define coordinate reference system on which to project your resulting shapefile
    crs = {'init': 'epsg:4326'}
        
    # 5 Convert pandas object (containing your csv) to geodataframe object using geopandas
    gdf = GeoDataFrame(df, crs = crs, geometry=geometry)
        
    # 6 Save file to local destination
    gdf.to_file(filename=str(file)[:-4]+'.shp', driver='ESRI Shapefile')


In [None]:
# convert the csv file to a DataFrame
data = DataFrame.from_csv(file)

data["Latitude"] = [float(str(i).replace(",", ".")) for i in data["Latitude"]]
data["Longitude"] = [float(str(i).replace(",", ".")) for i in data["Longitude"]]


data.info()
data.head()
#points = [Point(row['Latitude'], row['Longitude']) for key, row in data.iterrows()]


In [None]:
file= '/ncr2420/ANSU/6_Tasks/2109_ShipDetection/1_Johanna_Maria_23_26_August_2021/20210820_selected.csv'

df = pd.read_csv(file, decimal=".")
df.to_csv("/ncr2420/ANSU/6_Tasks/2109_ShipDetection/1_Johanna_Maria_23_26_August_2021/test2.csv", sep=',', encoding='utf-8', quotechar='"', decimal=',')
#df['Latitude'] = df['Latitude'].str.replace('.', ',.').astype(float)

#df['Latitude'] = df['Latitude'].replace('.', ',')
#df['Latitude']
#df['Latitude'] = df['Latitude'].apply(str).str.replace('.', ',')
#file_new = df.to_csv('/ncr2420/ANSU/6_Tasks/2109_ShipDetection/1_Johanna_Maria_23_26_August_2021/20210820_sel.csv')

#df['Latitude'] = df['Latitude'].astype(float)
#df.head(10)

#df['Latitude'] = df['Latitude'].astype(float)
#df.head(10)