# Statistics about SQL

In [2]:
from os import environ

import pandas as pd
from sqlalchemy import create_engine

In [3]:
db_uri = environ.get('SQLALCHEMY_DATABASE_URI', "postgresql://postgres:postgres@localhost:5432/spartid_ais")
engine = create_engine(db_uri)

In [4]:
pd.read_sql_query(
"""
    SELECT table_name
    FROM information_schema.tables
    WHERE table_schema = 'public'
    ORDER BY table_name;
""", engine)

Unnamed: 0,table_name
0,alembic_version
1,geography_columns
2,geometry_columns
3,historic_position
4,imo_vessel_codes
5,last_position
6,spatial_ref_sys


In [11]:
df_raw_history = pd.read_sql_query(
    """
        SELECT *
        FROM historic_position
        WHERE mmsi = 258509000 AND
              timestamp > now() - interval '3 day';
    """,
    engine)
print(len(df_raw_history))
df_raw_history

1638


Unnamed: 0,id,mmsi,lat,long,timestamp,msg_type,repeat,status,turn,speed,accuracy,course,heading,maneuver,raim,radio
0,16385322,258509000,59.909945,10.728565,2023-11-10 16:37:40.949161,3,0,Moored,0.0,0.0,True,2.9,200,NotAvailable,True,85290
1,16388427,258509000,59.909943,10.728543,2023-11-10 16:40:35.641058,3,0,Moored,0.0,0.0,False,2.9,200,NotAvailable,False,85946
2,16391726,258509000,59.909947,10.728533,2023-11-10 16:43:41.684553,3,0,Moored,0.0,0.0,False,2.9,200,NotAvailable,False,85418
3,16394817,258509000,59.909963,10.728558,2023-11-10 16:46:37.958840,3,0,Moored,0.0,0.0,True,2.9,200,NotAvailable,True,84762
4,16398151,258509000,59.909973,10.728535,2023-11-10 16:49:42.319799,3,0,Moored,0.0,0.0,True,2.9,200,NotAvailable,True,83834
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1633,20947032,258509000,59.871138,10.657430,2023-11-13 16:26:23.576335,1,0,UnderWayUsingEngine,0.0,0.0,False,214.6,32,NotAvailable,False,2272
1634,20949388,258509000,59.871130,10.657463,2023-11-13 16:28:34.004612,1,0,UnderWayUsingEngine,0.0,0.0,False,214.6,32,NotAvailable,False,49252
1635,20951711,258509000,59.871508,10.657948,2023-11-13 16:30:43.640688,1,0,UnderWayUsingEngine,-127.0,3.8,True,29.7,25,NotAvailable,True,34344
1636,20953847,258509000,59.875428,10.662588,2023-11-13 16:32:42.794695,1,0,UnderWayUsingEngine,0.0,9.7,True,32.7,30,NotAvailable,True,2246


In [45]:
(df_raw_history
 .assign(
     time=lambda df1: pd.to_datetime(df1["timestamp"]),
     status_cat=lambda df1: df1["status"].map({"UnderWayUsingEngine" : 1, "Moored" : 0})
    )
 .plot.bar(x="time", y="status_cat")
)

<Axes: xlabel='timestamp'>

In [44]:
df_raw_history.dtypes

id                    int64
mmsi                  int64
lat                 float64
long                float64
timestamp    datetime64[ns]
dtype: object

In [45]:
(df_raw_history
  .assign(time=pd.to_datetime(df_raw_history["timestamp"]))
  .drop(columns=["id", "timestamp"])
  .astype({"mmsi" : "category"})
).to_parquet("history_small.parquet.gz", compression="gzip")

In [46]:
df_raw_imo_vessels = pd.read_sql("SELECT * from imo_vessel_codes", engine)
print(len(df_raw_imo_vessels))
df_raw_imo_vessels

784


Unnamed: 0,mmsi,imo,name,flag,type
0,257230800,0,GRIP SOLAR,,80
1,258582000,6711467,STEFFEN ANDREAS,,70
2,257035150,9525560,GEO CASPIAN,,31
3,259674000,8739009,VEDEROY,,60
4,257147200,0,SOVIKNES,,0
...,...,...,...,...,...
779,258499000,5351894,POLARGIRL,,61
780,257093390,0,RAMNTINDEN,,52
781,258003290,8611958,SULE VIKING,,70
782,258104000,6930544,NORDKABEL,,33
