<p style="font-weight:bold; font-size:40px; color:green; line-height:1; margin:0px">
    Smart City Applications in Land Use and Transport (SCALUT)
</p>

## TfNSW GTFS-R Bus Vehicle Positions

<p style="font-weight:bold; font-size:24px; color:Gold; line-height:1; margin:4px 0px">
    1.2 Transform .CSV Files
</p>

<p style="font-weight:bold; font-size:18px; color:tomato; line-height:1; margin:4px 0px">
    Housekeeping: Import Libraries/Packages
</p>

In [None]:
import os
from datetime import datetime
from zipfile import ZipFile
import pandas as pd
import glob
import time
from numpy import loadtxt
import geopandas as gpd
import warnings

<p style="font-weight:bold; font-size:18px; color:tomato; line-height:1; margin:4px 0px">
    Specify Project Directory and Folders and Define Variables
</p>

In [None]:
## Specify the folder that stores the .PB.GZ files to be processed
# FileTP = '200901_0000-2400'
# FileTP = '201014_0000-2400'
# FileTP = '201111_0000-2400'
FileTP = 'Test_201014_0800-0805'

# ## Specify the GTFS-R file prefix
GTFS_VP_Prefix = 'GTFS_VP'

## Specify the main directory that stores input and output folders
DataDir = r'C:\OneMetis Dropbox\@One.IMS\Datasets\SCALUT_DW\TfNSW_GTFS_Buses'

## Specifiy the main folders that stores GTFS Static data
FldRawStatic = '10_Raw_Static'
FileIdStatic = '20200901190900'
# FileIdStatic = '20201001191000'
# FileIdStatic = '20201014201000'

## Specify the main folders that store input and output data
# FldRawPB = '10_Raw_PB'
FldRawCSVvp = '11_CSV_Raw_VP'
FldTransVP = '12_CSV_Transformed_VP'
FldClnTU = '13_CSV_Cleaned_Unique_TU'
FldNodesLinks = '22_CSV_Fu_Nodes_Links'
FldNL_Shp = '22_SHP_Fu_Nodes_Links'
FldVP_Shp = '22_SHP_VP_GIS'

## Specify the filename and location for the Routes List 
# FN_RoutesList = 'List_RouteShortNames_SydneyT80.txt'
# FN_RoutesList = 'List_RouteShortNames_TheGong.txt'
## OR
# Flt_Agency = 'Premier Illawarra'
# Flt_Agency = 'Transit Systems'

In [None]:
## Directory Path
# DirRawPBvp = DataDir + '/' + FldRawPB + '/' + FileTP
DirRawCSVvp = DataDir + '/' + FldRawCSVvp + '/' + FileTP
if not os.path.exists(DirRawCSVvp):
    os.makedirs(DirRawCSVvp)

DirTransVP = DataDir + '/' + FldTransVP + '/' + FileTP
if not os.path.exists(DirTransVP):
    os.makedirs(DirTransVP)

DirVP_Shp = DataDir + '/' + FldVP_Shp + '/' + FileTP
if not os.path.exists(DirVP_Shp):
    os.makedirs(DirVP_Shp)

# File_RoutesList = DataDir + '/' + FN_RoutesList

<p style="font-weight:bold; font-size:18px; color:tomato; line-height:1; margin:4px 0px">
    Define Functions
</p>

In [None]:
#################################################
## Read Raw VP CSV
def Read_CSV_Raw_VP(f):
    df_CSV_Raw_VP = pd.read_csv(f, sep=',', dtype={'id':'str',
                                                   'vehicle.trip.trip_id':'str',
                                                   'vehicle.trip.start_time':'str',
                                                   'vehicle.trip.start_date':'str',
                                                   'vehicle.trip.schedule_relationship':'str',
                                                   'vehicle.trip.route_id':'str',
                                                   'vehicle.position.latitude':'float',
                                                   'vehicle.position.longitude':'float',
                                                   'vehicle.position.bearing':'float',
                                                   'vehicle.position.speed':'float',
                                                   'vehicle.timestamp':'Int64',
                                                   'vehicle.congestion_level':'str',
                                                   'vehicle.vehicle.id':'str',
#                                                    'vehicle.vehicle.[transit_realtime.tfnsw_vehicle_descriptor].air_conditioned':'str', 
#                                                    'vehicle.vehicle.[transit_realtime.tfnsw_vehicle_descriptor].wheelchair_accessible':'Int64',
#                                                    'vehicle.vehicle.[transit_realtime.tfnsw_vehicle_descriptor].vehicle_model':'str',
#                                                    'vehicle.vehicle.[transit_realtime.tfnsw_vehicle_descriptor].performing_prior_trip':'str',
#                                                    'vehicle.vehicle.[transit_realtime.tfnsw_vehicle_descriptor].special_vehicle_attributes':'Int64',
                                                   'vehicle.occupancy_status':'str',
                                                   'VPheaderTS':'str',
                                                   'vehicle.timestampUTC':'str',
                                                   'vehicle.trip.start_DateTimeUTC':'str'},
#                                 parse_dates=['vehicle.trip.start_DateTimeUTC']
                               )
    return(df_CSV_Raw_VP)


<p style="font-weight:bold; font-size:18px; color:tomato; line-height:1; margin:4px 0px">
    FOR ARTEMIS: Combine Complete Raw CSV Files
</p>

In [None]:
## Record Start Time
tStart = datetime.now()
print('PROCESSING DATA FOR', FileTP, "...")
print('Time Start:', tStart.isoformat(' ', 'seconds'))

## Define File Path
PathTransVP = DirTransVP + '/' + GTFS_VP_Prefix + '_' + FileTP + '.csv'
PathTransVPshp = DirVP_Shp + '/' + GTFS_VP_Prefix + '_' + FileTP + '.shp'

## Check if Transformed VP file exists. Remove if exist.
if os.path.exists(PathTransVP):
    os.remove(PathTransVP)
if os.path.exists(PathTransVPshp):
    os.remove(PathTransVPshp)    

## concatenate All CSV Files in Folder (add new column with Filename as trace)
all_files = glob.glob(os.path.join(DirRawCSVvp, GTFS_VP_Prefix + '*.csv'))

iFile = 0
df_Con = []

for f in all_files:

    ## Count File
    iFile = iFile + 1

    ## Get FullFileName from Path
    FullFileName = f.split('\\')[-1]
    ## FileName exclude Extension
    FNexExt = os.path.splitext(FullFileName)[0]
    
    if iFile == 1:
        df_ConVP = Read_CSV_Raw_VP(f)
    else:
        df_X = Read_CSV_Raw_VP(f)

        df_ConVP = pd.concat([df_ConVP, df_X], ignore_index=True)

## Export concatenated file to CSV
df_ConVP.to_csv(PathTransVP, index=False)
print(df_ConVP.shape)

## Convert df_ConVP_Flt into Spatial Information
gdf_ConVP = gpd.GeoDataFrame(df_ConVP, 
                             geometry=gpd.points_from_xy(df_ConVP['vehicle.position.longitude'], 
                                                         df_ConVP['vehicle.position.latitude']),
                             crs='EPSG:4326')
## Export to SHP File
warnings.filterwarnings("ignore")
gdf_ConVP.to_file(PathTransVPshp)
warnings.resetwarnings()

## Record End Time
tEnd = datetime.now()
print(iFile, 'Files Processed:', tEnd.isoformat(' ', 'seconds') + '; Time Spent:', tEnd-tStart)
print('Transformed VP file saved in:', PathTransVP)
print('GIS file saved in:', PathTransVPshp)

Back to SCALUT TfNSW GTFS <b>[Table of Contents](SCALUT_TfNSW_GTFS_Analysis_TOC_v01.ipynb)</b>