In [14]:
from geojson import LineString
import pandas as pd
import turfpy.measurement as turf_measurement
from turfpy.measurement import along
from typing import List, Tuple
from sqlalchemy import create_engine
import psycopg2

import pandas as pd
from sqlalchemy import create_engine, MetaData, Table
import csv
from io import StringIO

In [2]:
multiplier = 1

# database account
db_username = "postgres"
db_password = "postgres"
db_host = "localhost"
db_port = "5432"
db_name = "bus_trial"


In [3]:
# Setting up required data! 
# Create a database connection string
connection = psycopg2.connect(user=db_username,
                                password=db_password,
                                host=db_host,
                                port=db_port,
                                database=db_name)

# Create a cursor to perform database operations
cursor = connection.cursor()
#Get the shape data in the database
query = "SELECT * FROM shapes"
cursor.execute(query)
# Get all the results
results = cursor.fetchall()
# Close the connection
cursor.close()
connection.close()
# Create a pandas dataframe
shape_df = pd.DataFrame(results, columns = ['id', 'shape_id', 'lat', 'lon', 'shape_pt_sequence'])
# Combine the coordinates together 
shape_df['coordinates'] = shape_df.apply(lambda row: [row['lon'], row['lat']], axis=1)

CREATE TABLE points_fps (
    trip_id VARCHAR(255),
    longitude DOUBLE PRECISION,
    latitude DOUBLE PRECISION,
    status INT,
    route VARCHAR(255),
    timestamp TIMESTAMP
);

In [23]:
def storeData(table, conn, keys, data_iter):
    """
    Execute SQL statement inserting data

    Parameters
    ----------
    table : pandas.io.sql.SQLTable
    conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection
    keys : list of str
        Column names
    data_iter : Iterable that iterates the values to be inserted
    """
    # gets a DBAPI connection that can provide a cursor
    dbapi_conn = conn.connection
    with dbapi_conn.cursor() as cur:
        s_buf = StringIO()
        writer = csv.writer(s_buf)
        writer.writerows(data_iter)
        s_buf.seek(0)

        columns = ', '.join('"{}"'.format(k) for k in keys)
        if table.schema:
            table_name = '{}.{}'.format(table.schema, table.name)
        else:
            table_name = table.name

        sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format(
            table_name, columns)
        cur.copy_expert(sql=sql, file=s_buf)


def unique_route_steps(timestamps) :
    individual_steps = []
    for i in range(len(timestamps) - 1):
        difference = timestamps[i + 1] - timestamps[i]
        multiplied_value = round(difference * multiplier)
        individual_steps.append(multiplied_value)
    return individual_steps

def is_equal(coord1, coord2) :
    return coord1[0] == coord2[0] and coord1[1] == coord2[1]

def getShapeCoords(shape_id) :
    # Create a database connection string
    connection = psycopg2.connect(user=db_username,
                                    password=db_password,
                                    host=db_host,
                                    port=db_port,
                                    database=db_name)

    # Create a cursor to perform database operations
    cursor = connection.cursor()
    #Get the shape data in the database
    query = "SELECT * FROM shapes WHERE shape_id = %s ORDER BY shape_pt_sequence"
    cursor.execute(query, (shape_id,))
    # Get all the results
    results = cursor.fetchall()
    # Close the connection
    cursor.close()
    connection.close()
    # Create a pandas dataframe
    shape_df = pd.DataFrame(results, columns = ['id', 'shape_id', 'lat', 'lon', 'shape_pt_sequence'])
    # Combine the coordinates together 
    shape_df['coordinates'] = shape_df.apply(lambda row: [row['lon'], row['lat']], axis=1)
    # Get all the coordinates into one list 
    shape_coordinates = shape_df['coordinates'].tolist()
    return(shape_coordinates)


def route_to_dict(route):
    return {tuple(coord): idx for idx, coord in enumerate(route)}

def interpolation_fps(subsetDataset) :
    subsetDataset = subsetDataset.reset_index(drop=True)
    unique_coordinates = subsetDataset["coordinates"]
    shape_id = str(subsetDataset["shape_id"].unique()[0])
    unique_timestamps = subsetDataset["timestamps"]
    unique_status = subsetDataset["status"]
    
    # Get the bus route coordinates
    unique_route = getShapeCoords(shape_id)
    route_dict = route_to_dict(unique_route)

    # Get the number of frames we want to allocate between each observatoin 
    unique_steps = unique_route_steps(unique_timestamps)
    interpolated_points = []
    status_conditions = [] 

    for i in range(len(unique_timestamps) - 1):
        # Check if the bus status has changed. If it has we need to change the colour of the bus 
        changed_bus = False
        first_status = unique_status[i]
    
        if unique_status[i] != unique_status[i + 1]:
            second_status = unique_status[i + 1]
            changed_bus = True
            status_difference = abs(second_status - first_status)

        start_idx = route_dict.get(tuple(unique_coordinates[i]))
        end_idx = route_dict.get(tuple(unique_coordinates[i + 1]))

        # Use the indices to get the subset coordinates:
        subset_coordinates = unique_route[start_idx: end_idx + 1]

        # Create a linestring object
        line = LineString(subset_coordinates)

        # Get the distance along the line 
        line_distance = turf_measurement.length(line)

        if not changed_bus:
            z = 0
            while z < line_distance:
                interpolated_point = along(line, z)
                interpolated_points.append(tuple(interpolated_point['geometry']['coordinates']))
                status_conditions.append(first_status)
                z += line_distance / unique_steps[i]
        else:
            z = 0
            while z < line_distance:
                interpolated_point = along(line, z)
                interpolated_points.append(tuple(interpolated_point['geometry']['coordinates']))

                interpolation_index = z / line_distance

                if status_difference == 1:
                    if interpolation_index < 0.5:
                        status = first_status
                    else:
                        status = second_status
                elif status_difference == 2:
                    if interpolation_index < 1/3:
                        status = first_status
                    elif 1/3 <= interpolation_index < 2/3:
                        if first_status < second_status:
                            status = first_status + 1
                        else:
                            status = first_status - 1
                    else:
                        status = second_status

                status_conditions.append(status)
                z += line_distance / unique_steps[i]

    # With all the data that we have, we need to input them all into the sql database
    startPoint = subsetDataset["timestamps"].min().item()
    route_name = subsetDataset['route_short_name'].unique()[0]

    # Create data for the DataFrame
    data = []
    for i, (point, status) in enumerate(zip(interpolated_points, status_conditions)): 
        row = {
            'trip_id' : subsetDataset['trip_id'][0],
            'longitude': point[0],
            'latitude': point[1],
            'status': status,
            'route': route_name,
            'timestamp': startPoint + i
        }
        data.append(row)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(data)
    
    engine = create_engine(f"postgresql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}")

    df.to_sql(
        name="points_fps",
        con=engine,
        if_exists="append",
        index=False,
        method=storeData
    )



In [21]:
trial_data = pd.read_csv("complete_data.csv")
trial_data['coordinates'] = trial_data.apply(lambda row: [row['stop_lon'], row['stop_lat']], axis=1)

In [24]:
ss = trial_data[(trial_data.trip_id == "957-12602-68400-2-ea1af35a")]
ss.groupby('trip_id').apply(interpolation_fps)

0    957-12602-68400-2-ea1af35a
1    957-12602-68400-2-ea1af35a
2    957-12602-68400-2-ea1af35a
3    957-12602-68400-2-ea1af35a
4    957-12602-68400-2-ea1af35a
Name: trip_id, dtype: object


StringDataRightTruncation: value too long for type character varying(255)
CONTEXT:  COPY points_fps, line 1, column trip_id: "0    957-12602-68400-2-ea1af35a
1    957-12602-68400-2-ea1af35a
2    957-12602-68400-2-ea1af35a
3   ..."


In [8]:
import sqlalchemy
print(sqlalchemy.__version__)

2.0.19
