In [0]:
from pyspark.sql.types import *

In [0]:
df_payments = spark.read.format("csv") \
    .option("inferSchema" , "false") \
    .option("header","false") \
    .option("sep", ",") \
    .load('dbfs:/FileStore/payments.csv') \
    .toDF("payment_id", "date", "amount", "rider_id")

In [0]:
display(df_payments)

payment_id,date,amount,rider_id
payment_id,date,amount,account_number
1,01-05-2019,9,1329
2,01-06-2019,9,1329
3,01-07-2019,9,1329
4,01-08-2019,9,1329
5,01-09-2019,9,1329
6,01-10-2019,9,1329
7,01-11-2019,9,1329
8,01-12-2019,9,1329
9,01-01-2020,9,1329


In [0]:
df_payments.write.format("delta") \
    .mode("overwrite") \
    .save("/delta/bronze_payments")

In [0]:
df_stations = spark.read.format("csv") \
    .option("inferSchema" , "false") \
    .option("header","false") \
    .option("sep", ",") \
    .load('dbfs:/FileStore/stations.csv') \
    .toDF("station_id", "name", "latitude", "longitude")

In [0]:
df_stations.display()

station_id,name,latitude,longitude
station_id,name,latitude,longitude
525,0 Glenwood Ave & Touhy Ave,,
Name: start_station_name,"dtype: object""",0 42.0128,
Name: start_lat,"dtype: float64""",0 -87.665906,
Name: start_lng,"dtype: float64""",,
TA1306000016,2 Sheffield Ave & Fullerton Ave,,
Name: start_station_name,"dtype: object""",2 41.925602,
Name: start_lat,"dtype: float64""",2 -87.653708,
Name: start_lng,"dtype: float64""",,
KA1504000151,3 Clark St & Bryn Mawr Ave,,


In [0]:
df_stations.write.format("delta") \
    .mode("overwrite") \
    .save("/delta/bronze_stations")

In [0]:
riders_schema = StructType([
    StructField("rider_id", IntegerType(), False),
    StructField("first", StringType(), True),
    StructField("last", StringType(), True),
    StructField("address", StringType(), True),
    StructField("birthday", DateType(), True),
    StructField("account_start_date", DateType(), True),
    StructField("account_end_date", DateType(), True),
    StructField("is_member", BooleanType(), False)
    ])

In [0]:
df_riders = spark.read.format("csv") \
    .schema(riders_schema) \
    .option("sep", ",") \
    .load('dbfs:/FileStore/riders.csv')

In [0]:
display(df_riders)


rider_id,first,last,address,birthday,account_start_date,account_end_date,is_member
1000,Kimberly,Williams,1200 Alyssa Squares,1988-03-28,2019-04-23,,True
1001,Anthony,Erickson,397 Diana Ferry,1976-12-04,2019-11-01,2020-09-01,True
1002,Jessica,Roach,644 Brittany Row Apt. 097,1998-03-28,2022-02-04,,True
1003,Andrew,Ryan,996 Dickerson Turnpike,1999-03-05,2019-08-26,,False
1004,Ian,Peters,7009 Nathan Expressway,1969-06-25,2019-09-14,,True
1005,Michael,Gillespie,224 Washington Mills Apt. 467,1974-09-28,2020-03-24,,False
1006,Ryan,Peters,1137 Angela Locks,2003-07-10,2020-11-27,2021-06-01,True
1007,Crystal,Sanchez,979 Phillips Ways,1987-10-15,2016-12-11,,False
1008,David,Hicks,7691 Evans Court,1986-07-12,2021-03-28,2021-04-01,True
1009,Daniel,Hicks,9922 Jim Crest Apt. 319,1981-02-14,2020-06-12,2021-02-01,True


In [0]:
df_riders.write.format("delta") \
    .mode("overwrite") \
    .save("/delta/bronze_riders")

In [0]:
trip_schema = StructType([
    StructField("trip_id", StringType(), False),
    StructField("rideable_type", StringType(), True),
    StructField("start_at", TimestampType(), True),
    StructField("ended_at", TimestampType(), True),
    StructField("start_station_id", StringType(), True),
    StructField("end_station_id", StringType(), True),
    StructField("rider_id", IntegerType(), True)
    ])


In [0]:
df_trips = spark.read.format("csv") \
    .schema(trip_schema) \
    .option("sep", ",") \
    .load('dbfs:/FileStore/trips.csv')

In [0]:
display(df_trips)

trip_id,rideable_type,start_at,ended_at,start_station_id,end_station_id,rider_id
ride_id,rideable_type,,,start_station_id,end_station_id,
C2F7DD78E82EC875,electric_bike,,,525,RP-007,3516.0
A6CF8980A652D272,electric_bike,,,525,RP-007,10759.0
BD0F91DFF741C66D,classic_bike,,,TA1306000016,TA1307000001,2079.0
CBB80ED419105406,classic_bike,,,KA1504000151,TA1309000021,2480.0
DDC963BFDDA51EEA,classic_bike,,,TA1309000002,TA1305000029,10894.0
A39C6F6CC0586C0B,classic_bike,,,637,TA1305000034,5335.0
BDC4AB637EDF981B,classic_bike,,,KA1504000158,13323,2583.0
81751A3186E59A6B,classic_bike,,,TA1306000016,13271,6906.0
154222B86A338ABD,electric_bike,,,13304,WL-012,5394.0


In [0]:
df_trips.write.format("delta") \
    .mode("overwrite") \
    .save("/delta/bronze_trips")