In [1]:
import asyncio
import websockets
import json
import time
import warnings
import pandas as pd

from datetime import datetime, timezone

# Ignore warnings
warnings.filterwarnings('ignore') 

In [2]:
# Define dataframes columns 
position_columns=['Message.PositionReport.Cog',
    'Message.PositionReport.Latitude', 'Message.PositionReport.Longitude',
    'Message.PositionReport.NavigationalStatus',
    'Message.PositionReport.PositionAccuracy',
    'Message.PositionReport.RateOfTurn', 'Message.PositionReport.Sog',
    'Message.PositionReport.TrueHeading', 'MetaData.MMSI',
    'MetaData.ShipName', 'MetaData.time_utc']

position_exclude = ["MessageType", "Message.PositionReport.CommunicationState", 
    "MetaData.MMSI_String", 'MetaData.latitude', 'MetaData.longitude',
    'Message.PositionReport.MessageID', 'Message.PositionReport.Raim', 
    'Message.PositionReport.Timestamp', 'Message.PositionReport.RepeatIndicator', 
    "Message.PositionReport.Spare", 'Message.PositionReport.UserID',
    'Message.PositionReport.Valid', 'Message.PositionReport.SpecialManoeuvreIndicator']

ship_data_columns = [
       'Message.ShipStaticData.CallSign', 'Message.ShipStaticData.Destination',
       'Message.ShipStaticData.Eta.Day', 'Message.ShipStaticData.Eta.Hour',
       'Message.ShipStaticData.Eta.Minute', 'Message.ShipStaticData.Eta.Month',
       'Message.ShipStaticData.ImoNumber',
       'Message.ShipStaticData.Name', 'Message.ShipStaticData.Type', 
       'MetaData.MMSI',  
       'MetaData.latitude', 'MetaData.longitude', 'MetaData.time_utc']

ship_data_exclude = ['MessageType', 'Message.ShipStaticData.AisVersion',
    'Message.ShipStaticData.Dte', 'Message.ShipStaticData.Dimension.A',
    'Message.ShipStaticData.Dimension.B', 'Message.ShipStaticData.Dimension.C',
    'Message.ShipStaticData.Dimension.D', 'Message.ShipStaticData.FixType',
    'Message.ShipStaticData.MaximumStaticDraught',
    'Message.ShipStaticData.MessageID', 'Message.ShipStaticData.RepeatIndicator',
    'Message.ShipStaticData.Spare', 'Message.ShipStaticData.UserID', 'MetaData.ShipName',
    'MetaData.MMSI_String', 'Message.ShipStaticData.Valid'
]


In [3]:
async def connect_ais_stream():
    
    # Instantiate output Dataframes
    position_df = pd.DataFrame(columns=position_columns)
    ship_data_df = pd.DataFrame(columns=ship_data_columns)
    
    # Set timeout
    timeout = time.time() + 60*60*24
    
    # Connect websocket to AISStream
    async with websockets.connect("wss://stream.aisstream.io/v0/stream") as websocket:
        
        # Subscribe with personal APIKey and Coordinates bounding the Laconian Gulf
        subscribe_message = {"APIKey": PRIVATE_KEY,
                             "BoundingBoxes": [[[36.86687546866755, 22.184442828560584], 
                                                [36.199014645278616, 23.13974778084953]]]} 

        subscribe_message_json = json.dumps(subscribe_message)
        await websocket.send(subscribe_message_json)
        
        # Process websocket messages
        async for message_json in websocket:
            
            # Load the JSON and convert to Pandas datafram
            message = json.loads(message_json)
            message_df = pd.json_normalize(message)
            
            # Processing if the message is a position report
            if message_df["MessageType"][0] == "PositionReport":
                
                # Add message to the results dataframe 
                message_df.drop(columns=position_exclude, inplace=True)
                position_df = pd.concat([position_df, message_df], ignore_index=True)
                
                # Print execution status every time 100 rows are stored
                if len(position_df) % 100 == 0:
                    time_remaining = round((timeout - time.time()) / 60, 1)
                    print(f"Time to finish: {time_remaining}")
                    print(f"Position messages extracted: {len(position_df)}")
            
            # Processing if the message is Ship Static Data
            elif message_df["MessageType"][0] == "ShipStaticData":
                
                message_df.drop(columns=ship_data_exclude, inplace=True)
                ship_data_df = pd.concat([ship_data_df, message_df], ignore_index=True)
        
            # Timeout exit 
            if time.time() > timeout:
                return position_df, ship_data_df
            

In [None]:
position_day1, ship_data_day1 = await connect_ais_stream()

position_day1.to_csv("Data/position_day1.csv")
ship_data_day1.to_csv("Data/ship_data_day1.csv")

In [None]:
# Storage predicted calculation
min5 = position_day1.memory_usage(index=True, deep=True).sum()
hora = min5 * 12
dia = hora * 24
weekday = dia * 5

print(hora)
print(dia)
print(weekday)

In [None]:
position_day2, ship_data_day2 = await connect_ais_stream()

position_day2.to_csv("Data/position_day2.csv")
ship_data_day2.to_csv("Data/ship_data_day2.csv")

In [None]:
position_day3, ship_data_day3 = await connect_ais_stream()

position_day3.to_csv("Data/position_day3.csv")
ship_data_day3.to_csv("Data/ship_data_day3.csv")

In [None]:
position_day4, ship_data_day4 = await connect_ais_stream()

position_day4.to_csv("Data/position_day4.csv")
ship_data_day4.to_csv("Data/ship_data_day4.csv")

In [None]:
position_day5, ship_data_day5 = await connect_ais_stream()

position_day5.to_csv("Data/position_day5.csv")
ship_data_day5.to_csv("Data/ship_data_day5.csv")

In [None]:
position_day6, ship_data_day6 = await connect_ais_stream()

position_day6.to_csv("Data/position_day6.csv")
ship_data_day6.to_csv("Data/ship_data_day6.csv")

In [None]:
position_day7, ship_data_day7 = await connect_ais_stream()

position_day7.to_csv("Data/position_day7.csv")
ship_data_day7.to_csv("Data/ship_data_day7.csv")

In [None]:
position_day8, ship_data_day8 = await connect_ais_stream()

position_day8.to_csv("Data/position_day8.csv")
ship_data_day8.to_csv("Data/ship_data_day8.csv")

In [None]:
position_day9, ship_data_day9 = await connect_ais_stream()

position_day9.to_csv("Data/position_day9.csv")
ship_data_day9.to_csv("Data/ship_data_day9.csv")

In [None]:
position_day11, ship_data_day11 = await connect_ais_stream()

position_day11.to_csv("Data/position_day11.csv")
ship_data_day11.to_csv("Data/ship_data_day11.csv")

In [None]:
position_day12, ship_data_day12 = await connect_ais_stream()

position_day12.to_csv("Data/position_day12.csv")
ship_data_day12.to_csv("Data/ship_data_day12.csv")

In [None]:
position_day13, ship_data_day13 = await connect_ais_stream()

position_day13.to_csv("Data/position_day13.csv")
ship_data_day13.to_csv("Data/ship_data_day13.csv")

In [None]:
position_day14, ship_data_day14 = await connect_ais_stream()

position_day14.to_csv("Data/position_day14.csv")
ship_data_day14.to_csv("Data/ship_data_day14.csv")

In [4]:
position_day15, ship_data_day15 = await connect_ais_stream()

position_day15.to_csv("Data/position_day15.csv")
ship_data_day15.to_csv("Data/ship_data_day15.csv")

Time to finish: 1434.9
Position messages extracted: 100
Time to finish: 1428.3
Position messages extracted: 200
Time to finish: 1422.3
Position messages extracted: 300
Time to finish: 1416.1
Position messages extracted: 400
Time to finish: 1410.0
Position messages extracted: 500
Time to finish: 1404.8
Position messages extracted: 600
Time to finish: 1398.2
Position messages extracted: 700
Time to finish: 1392.8
Position messages extracted: 800
Time to finish: 1386.4
Position messages extracted: 900
Time to finish: 1381.0
Position messages extracted: 1000
Time to finish: 1375.5
Position messages extracted: 1100
Time to finish: 1369.7
Position messages extracted: 1200
Time to finish: 1363.5
Position messages extracted: 1300
Time to finish: 1358.1
Position messages extracted: 1400
Time to finish: 1351.9
Position messages extracted: 1500
Time to finish: 1345.4
Position messages extracted: 1600
Time to finish: 1338.9
Position messages extracted: 1700
Time to finish: 1332.1
Position messages

Time to finish: 565.2
Position messages extracted: 14600
Time to finish: 560.0
Position messages extracted: 14700
Time to finish: 554.9
Position messages extracted: 14800
Time to finish: 549.9
Position messages extracted: 14900
Time to finish: 544.7
Position messages extracted: 15000
Time to finish: 540.0
Position messages extracted: 15100
Time to finish: 535.0
Position messages extracted: 15200
Time to finish: 529.6
Position messages extracted: 15300
Time to finish: 524.4
Position messages extracted: 15400
Time to finish: 519.2
Position messages extracted: 15500
Time to finish: 514.4
Position messages extracted: 15600
Time to finish: 509.3
Position messages extracted: 15700
Time to finish: 503.6
Position messages extracted: 15800
Time to finish: 498.1
Position messages extracted: 15900
Time to finish: 492.4
Position messages extracted: 16000
Time to finish: 487.0
Position messages extracted: 16100
Time to finish: 481.5
Position messages extracted: 16200
Time to finish: 473.5
Position 

In [7]:
position_day16, ship_data_day16 = await connect_ais_stream()

position_day16.to_csv("Data/position_day16.csv")
ship_data_day16.to_csv("Data/ship_data_day16.csv")

Time to finish: 1344.7
Position messages extracted: 100
Time to finish: 1179.2
Position messages extracted: 200
Time to finish: 1047.6
Position messages extracted: 300
Time to finish: 826.5
Position messages extracted: 400
Time to finish: 726.8
Position messages extracted: 500
Time to finish: 698.2
Position messages extracted: 600
Time to finish: 680.9
Position messages extracted: 700
Time to finish: 668.3
Position messages extracted: 800
Time to finish: 656.8
Position messages extracted: 900
Time to finish: 647.6
Position messages extracted: 1000
Time to finish: 639.2
Position messages extracted: 1100
Time to finish: 630.3
Position messages extracted: 1200
Time to finish: 622.4
Position messages extracted: 1300
Time to finish: 612.5
Position messages extracted: 1400
Time to finish: 601.8
Position messages extracted: 1500
Time to finish: 594.5
Position messages extracted: 1600
Time to finish: 586.5
Position messages extracted: 1700
Time to finish: 578.8
Position messages extracted: 180

Time to finish: 159.5
Position messages extracted: 14700
Time to finish: 156.3
Position messages extracted: 14800
Time to finish: 152.9
Position messages extracted: 14900
Time to finish: 150.1
Position messages extracted: 15000
Time to finish: 147.0
Position messages extracted: 15100
Time to finish: 144.0
Position messages extracted: 15200
Time to finish: 140.8
Position messages extracted: 15300
Time to finish: 137.7
Position messages extracted: 15400
Time to finish: 134.7
Position messages extracted: 15500
Time to finish: 131.8
Position messages extracted: 15600
Time to finish: 128.8
Position messages extracted: 15700
Time to finish: 125.9
Position messages extracted: 15800
Time to finish: 122.6
Position messages extracted: 15900
Time to finish: 119.7
Position messages extracted: 16000
Time to finish: 116.7
Position messages extracted: 16100
Time to finish: 114.0
Position messages extracted: 16200
Time to finish: 110.8
Position messages extracted: 16300
Time to finish: 107.9
Position 