In [1]:
import psycopg2
import glob

In [2]:
# Try connection to the database 'carto'
try:
    #conn for connection
    conn =  psycopg2.connect(dbname='carto', user='carto', password='carto', host='postgis', port='5432')
except psycopg2.DatabaseError:
    print ("I am unable to connect the database")

In [3]:
# Prepare a query to create table for NY taxi data
q_create_table = """
                 drop table if exists taxi;
                 create table taxi
                (
                    vendorID int,
                    tpep_pickup_datetime timestamp,
                    tpep_dropoff_datetime timestamp,
                    passenger_count int,
                    trip_distance numeric,
                    pickup_longitude numeric,
                    pickup_latitude numeric,
                    RateCodeID int,
                    store_and_fwd_flag char(1),
                    dropoff_longitude numeric,
                    dropoff_latitude numeric,
                    payment_type int,
                    fare_amount numeric,
                    extra numeric,
                    mta_tax numeric,
                    tip_amount numeric,
                    tolls_amount numeric,
                    improvement_surcharge numeric,
                    total_amount numeric
                )
                """


In [4]:
# Create a 'taxi' table inside the postgis database
try:
    cur = conn.cursor()  # initiate cursor
    cur.execute(q_create_table)  # query
    conn.commit()
    print('Table created')

except psycopg2.DatabaseError: 
    print ("Failed to create the table")

Table created


In [5]:
# Create the list of NY taxi data files
filenames = sorted(glob.glob('../data/NY_taxi_data/*')) # list all files in ascending order
filenames[:3]

['../data/NY_taxi_data/yellow_tripdata_2015-01_00',
 '../data/NY_taxi_data/yellow_tripdata_2015-01_01',
 '../data/NY_taxi_data/yellow_tripdata_2015-01_02']

In [7]:
try:
    with open(filenames[1], 'r') as file:
        rows = [tuple(line.strip().split(',')) for line in file if line]
        
        query = f'''insert into taxi
                   (
                        vendorID,
                        tpep_pickup_datetime,
                        tpep_dropoff_datetime,
                        passenger_count,
                        trip_distance,
                        pickup_longitude,
                        pickup_latitude,
                        RateCodeID,
                        store_and_fwd_flag,
                        dropoff_longitude,
                        dropoff_latitude,
                        payment_type,
                        fare_amount,
                        extra,
                        mta_tax,
                        tip_amount,
                        tolls_amount,
                        improvement_surcharge,
                        total_amount
                    )
                   values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''
    
        for row in rows:
            # print(query)
            # print(row)
            # break
            cur.execute(query, row)

    conn.commit()
    print('Data inserted to the table')

except psycopg2.DatabaseError:
    print ("Failed to copy data to the table")

Data inserted to the table


In [None]:
q_make_points = '''
                select
                ST_MakePoint(pickup_longiutde, pickup_latitude) as pickup_point,
                ST_MakePoint(dropoff_longiutde, dropoff_latitude) as dropoff_point
                from taxi
                '''
cur.execute(q_make_points)

In [8]:
#close connection for this client
if conn:
    conn.close ()