### FHV Bases
Extract and Transform FHV Bases data

Create Dimension table and register as Global temporary View

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.functions import col

In [0]:
dbutils.fs.ls("/mnt/datablob/")

In [0]:
# Create schema for FHV Bases

fhvBasesSchema = StructType([
    StructField("License Number", StringType(), True),
    StructField("Entity Name", StringType(), True),
    StructField("Telephone Number", LongType(), True),
    StructField("SHL Endorsed", StringType(), True),
    StructField("Type of Base", StringType(), True),
    StructField("Address", 
                StructType([
                    StructField("Building", StringType(), True),
                    StructField("Street", StringType(), True), 
                    StructField("City", StringType(), True), 
                    StructField("State", StringType(), True), 
                    StructField("Postcode", StringType(), True)]),
                True
               ),
                
    StructField("GeoLocation", 
                StructType([
                    StructField("Latitude", StringType(), True),
                    StructField("Longitude", StringType(), True), 
                    StructField("Location", StringType(), True)]),
                True
              )   
]
)

In [0]:
# Apply schema to FHV bases
fhvBasesDF = spark.read \
  .schema(fhvBasesSchema) \
  .option("multiline", "true") \
  .json("dbfs:/mnt/datablob/FhvBases.json")

In [0]:
# Flatten out structure
fhvBasesFlatDF = fhvBasesDF \
                        .select(
                                  col("License Number").alias("BaseLicenseNumber"),
                                  col("Entity Name").alias("EntityName"),
                                  col("Telephone Number").alias("TelephoneNumber"),
                                  col("SHL Endorsed").alias("ShlEndorsed"),
                                  col("Type of Base").alias("BaseType"),
                                  col("Address.Building").alias("AddressBuilding"),
                                  col("Address.Street").alias("AddressStreet"),
                                  col("Address.City").alias("AddressCity"),
                                  col("Address.State").alias("AddressState"),
                                  col("Address.Postcode").alias("AddressPostCode"),
                                  col("GeoLocation.Latitude").alias("GeoLocationLatitude"),
                                  col("GeoLocation.Longitude").alias("GeoLocationLongitude"),
                                  col("GeoLocation.Location").alias("GeoLocationLocation")
                               )

print("Extracted FHV Bases data")

In [0]:
# display(fhvBasesFlatDF)

BaseLicenseNumber,EntityName,TelephoneNumber,ShlEndorsed,BaseType,AddressBuilding,AddressStreet,AddressCity,AddressState,AddressPostCode,GeoLocationLatitude,GeoLocationLongitude,GeoLocationLocation
B02865,"VIER-NY,LLC",6466657536.0,No,BLACK CAR BASE,636,WEST 28 STREET,NEW YORK,NY,10001,40.75273,-74.006408,"(40.75273, -74.006408)"
B02634,VETERANS RADIO DISPATCHER CORP.,7183647878.0,No,LIVERY BASE,131,KINGSBRIDGE ROAD,BRONX,NY,10468,40.86927,-73.90281,"(40.86927, -73.90281)"
B80094,ALPHA VAN LINE,5162850750.0,No,COMMUTER VAN AUTHORITY BASE,115-54,238 STREET,ELMONT,NY,11003,40.693473,-73.724446,"(40.693473, -73.724446)"
B02677,"A.T.B. CAR AND LIMOUSINE SERVICE, INC.",7184854444.0,No,LIVERY BASE,866,NEW LOTS AVENUE,BROOKLYN,NY,11208,40.667838,-73.8788,"(40.667838, -73.8788)"
B02152,"KYOEI LIMOUSINE, INC.",7183263258.0,No,LUXURY/LIMOUSINE,57-48,MASPETH AVENUE,MASPETH,NY,11378,40.722961,-73.91031,"(40.722961, -73.91031)"
B02844,"ENDOR CAR & DRIVER,LLC.",4154758459.0,No,BLACK CAR BASE,31-00,47 AVENUE SUITE # 4123A,LIC,NY,11101,40.742082,-73.93552,"(40.742082, -73.93552)"
B02841,"SKYWAY EXECUTIVE SERVICE, INC",7183595959.0,No,BLACK CAR BASE,68-20A,FRESH MEADOW LANE,FRESH MEADOWS,NY,11365,40.733337,-73.794706,"(40.733337, -73.794706)"
B00472,FARRELL'S LEASING CO.,2128616300.0,No,LUXURY/LIMOUSINE,22-11,38 AVENUE,LIC,NY,11101,40.757077,-73.937504,"(40.757077, -73.937504)"
B01739,CITY CAR SERVICE CORP,7184182222.0,No,LIVERY BASE,429,SUTTER AVENUE,BROOKLYN,NY,11212,40.668473,-73.903383,"(40.668473, -73.903383)"
B00248,YELLOWSTONE TRANSPORTATION INC.,7185397777.0,No,LIVERY BASE,41-31,MAIN STREET,FLUSHING,NY,11355,40.758114,-73.82962,"(40.758114, -73.82962)"


In [0]:
fhvBasesFlatDF.createOrReplaceGlobalTempView("DimFHVBases")

print("Saved FHV Bases dimension as a global temp view")

In [0]:
# Store the DataFrame as an Unmanaged Table
fhvBasesFlatDF \
    .write \
    .mode("overwrite") \
    .option("path", "/mnt/datalake/DimensionalModel/Dimensions/FHVBasesDimension.parquet") \
    .saveAsTable("TaxiServiceWarehouse.DimFHVBases") 

print("Saved FHVBases dataframe as a dimension and unmanaged table")

In [0]:
dbutils.notebook.exit("Success")