# Google Cloud Storage Access Notebook

This notebook contains code for the prototyping of loading to bigquery via Google Cloud Storage.

Important! Clone the [project repository](https://github.com/spencermarley/smu-cs611-mleng-project.git) in order to access the parsing functions.

## Section 1 - Imports

In [1]:
import gcsfs
import sys
import os
import json
import math
import itertools
from collections import Counter
import re

import datetime
from datetime import date, timedelta

import pandas as pd
import geopandas as gpd
from matplotlib import pyplot as plt
import seaborn as sns

from shapely import wkt
from shapely.ops import nearest_points

import warnings
from tqdm import tqdm
import joblib
warnings.filterwarnings('ignore')

from src import jsonParser
from src import assignment

## Section 2 - Bucket Connection Setup

In [2]:
project = 'ml-eng-cs611-group-project'
nea_bucket = 'ml-eng-cs611-group-project-nea'
taxi_bucket = 'ml-eng-cs611-group-project-taxis'
dataset_id='taxi_dataset'
measure = 'rainfall'
measures = ['rainfall','air-temperature','relative-humidity']

fs = gcsfs.GCSFileSystem(project=project)
nea_filenames = fs.glob('/'.join([nea_bucket,measure,"*"]))

In [3]:
# nea_filenames # 'ml-eng-cs611-group-project-nea/rainfall/2022-05-24T22-21-01.json' ...

In [4]:
taxi_filenames = fs.glob('/'.join([taxi_bucket,'taxis',"*"]))

In [5]:
# taxi_filenames # 'ml-eng-cs611-group-project-taxis/taxis/2022-05-27T14-00-03.json' ...

## Section 3a - Access NEA files

We will read the NEA `.json` files and append all measures to a single dict

In [6]:
fs = gcsfs.GCSFileSystem(project=project)

nea_data = {}

for measure in measures:
    filenames = fs.glob('/'.join([nea_bucket,measure,"*"]))
    file = filenames[0]
    parser = jsonParser.jsonParser(fs)
    
    items = parser.get_items(file,measure)    
    metadata = parser.get_metadata(file,measure)
    
    nea_data[measure]={'items':items,'metadata':metadata}

In [7]:
len(nea_data['rainfall']['items'])

66

In [8]:
nea_data['rainfall']['items'].tail(3)

Unnamed: 0,timestamp,station_id,value,Description
63,2022-05-24 22:00:00-01:00,S116,0.0,rainfall
64,2022-05-24 22:00:00-01:00,S104,0.0,rainfall
65,2022-05-24 22:00:00-01:00,S100,0.0,rainfall


Now we load the taxi data

In [9]:
taxi_data = parser.load_taxi_data(taxi_filenames[0])

In [10]:
taxi_data

Unnamed: 0,timestamp,longitude,latitude
0,2022-05-27 14:00:00-03:00,103.624620,1.300000
1,2022-05-27 14:00:00-03:00,103.658034,1.312330
2,2022-05-27 14:00:00-03:00,103.669129,1.325566
3,2022-05-27 14:00:00-03:00,103.679667,1.326507
4,2022-05-27 14:00:00-03:00,103.679980,1.314830
...,...,...,...
1017,2022-05-27 14:00:00-03:00,103.988921,1.357636
1018,2022-05-27 14:00:00-03:00,103.989799,1.358836
1019,2022-05-27 14:00:00-03:00,103.989860,1.360000
1020,2022-05-27 14:00:00-03:00,103.989890,1.360000


## Section 4 - Write to BigQuery

We have created 6 tables in Bigquery:

- `air-temperature-items`
- `air-temperature-metadata`
- `rainfall-items`
- `rainfall-metadata`
- `relative-humidity-items`
- `relative-humidity-metadata`

First, we write the corresponding measure metadata and items to the correct table

In [8]:
for measure in measures:
    print(f"Writing items for {measure}")
    nea_data[measure]['items'].to_gbq(dataset_id+'.'+measure+'-items',project,chunksize=None,if_exists='append')
    print(f"Writing metadata for {measure}")
    nea_data[measure]['metadata'].to_gbq(dataset_id+'.'+measure+'-metadata',project,chunksize=None,if_exists='append')

100%|██████████| 1/1 [00:00<00:00, 9709.04it/s]
100%|██████████| 1/1 [00:00<00:00, 10837.99it/s]
100%|██████████| 1/1 [00:00<00:00, 10618.49it/s]
100%|██████████| 1/1 [00:00<00:00, 9754.20it/s]
100%|██████████| 1/1 [00:00<00:00, 11586.48it/s]
100%|██████████| 1/1 [00:00<00:00, 10922.67it/s]


Next, we write the taxi data

In [9]:
taxi_data.to_gbq(dataset_id+'.'+'taxi-availability',project,chunksize=None,if_exists='append')

100%|██████████| 1/1 [00:00<00:00, 9986.44it/s]


# Load most recent file

In [6]:
import pandas as pd
import gcsfs
import sys
import json

sys.path.append('smu-cs611-mleng-project')
from src import jsonParser

def load_nea_to_gbq(project:str,bucket:str,dataset_id:str,measure:str,filename:str):
    '''Load a single json 
    Args:
        project:str:
        bucket
        dataset_id
        measure
        filename
    '''
    fs = gcsfs.GCSFileSystem(project=project)
    filenames = fs.glob('/'.join([bucket,measure,"*"]))
    current_file=filenames[-1]
    print(f"Current processing {current_file}")

    parser = jsonParser.jsonParser(fs)
    items = parser.get_items(current_file,measure)
    metadata = parser.get_metadata(current_file,measure)

    item_table = dataset_id+'.'+measure+'-items'
    metadata_table = dataset_id+'.'+measure+'-metadata'

    print(f"Writing {measure} items to {item_table}")
    items.to_gbq(item_table,project,chunksize=None,if_exists='append')

    print(f"Writing {measure} metadata to {metadata_table}")
    metadata.to_gbq(metadata_table,project,chunksize=None,if_exists='append')

project=params['project']
bucket=params['bucket']
dataset_id=params['dataset_id']
    

if __name__ == '__main__':
    import argparse
    
    parser = argparse.ArgumentParser(description='Reads a single NEA JSON file to GBQ')    
    parser.add_argument('--project','-p', default='ml-eng-cs611-group-project', type=str, help='GCP project name i.e. ml-eng-cs611-group-project')
    parser.add_argument('--bucket','-b', default='ml-eng-cs611-group-project-nea', type=str, help='GCS bucket name i.e. ml-eng-cs611-group-project-nea')
    parser.add_argument('--dataset_id','-d', default='taxi_dataset', type=str, help='GCS bucket name i.e. ml-eng-cs611-group-project-nea')
    parser.add_argument('--measure','-m', type=str, help='NEA measure i.e. air-temperature,relative-humidity or rainfall')
    parser.add_argument('--filename','-f', type=str, help='If provided, file to load')
    parser.add_argument('--date','-d', type=str, help='YYYY-MM-DD format. If provided, load data up to this date')
    args = parser.parse_args()
    
    project = args.project
    bucket = args.bucket
    dataset_id = args.dataset_id
    measure = args.measure
    filename = args.filename
    date = args.date
    

Current processing ml-eng-cs611-group-project-nea/rainfall/2022-05-24T22-21-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 12018.06it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 6078.70it/s]


Current processing ml-eng-cs611-group-project-nea/air-temperature/2022-05-24T22-21-01.json
Writing air-temperature items to taxi_dataset.air-temperature-items


100%|██████████| 1/1 [00:00<00:00, 12483.05it/s]


Writing air-temperature metadata to taxi_dataset.air-temperature-metadata


100%|██████████| 1/1 [00:00<00:00, 9576.04it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-24T22-21-01.json
Writing relative-humidity items to taxi_dataset.relative-humidity-items


100%|██████████| 1/1 [00:00<00:00, 11428.62it/s]


Writing relative-humidity metadata to taxi_dataset.relative-humidity-metadata


100%|██████████| 1/1 [00:00<00:00, 9098.27it/s]


In [7]:
stop_file = '2022-06-04T00-00-06.json'

In [8]:
stop_file[:10]

'2022-06-04'

# Batch upload until date

In [None]:
import pandas as pd
import gcsfs
import sys
import json

sys.path.append('smu-cs611-mleng-project')
from src import jsonParser

params={
    'project':'ml-eng-cs611-group-project',
    'nea_bucket':'ml-eng-cs611-group-project-nea',
    'taxi_bucket':'ml-eng-cs611-group-project-taxis',
    'dataset_id':'taxi_dataset',
    'measures':['rainfall','air-temperature','relative-humidity'],
    'stop_date':'2022-05-28'
}

project=params['project']
nea_bucket=params['nea_bucket']
taxi_bucket=params['taxi_bucket']
dataset_id=params['dataset_id']
stop_date=params['stop_date']

nea_filenames = fs.glob('/'.join([nea_bucket,measure,"*"]))
taxi_filenames = fs.glob('/'.join([taxi_bucket,'taxis',"*"]))
parser = jsonParser.jsonParser(fs)

for measure in params['measures']:
    fs = gcsfs.GCSFileSystem(project=project)
    filenames = fs.glob('/'.join([nea_bucket,measure,"*"]))
    for file in nea_filenames:
        if stop_date in file[:10]:
            break
        else:            
            print(f"Current processing {file}")            
            items = parser.get_items(file,measure)
            metadata = parser.get_metadata(file,measure)

            item_table = dataset_id+'.'+measure+'-items'
            metadata_table = dataset_id+'.'+measure+'-metadata'

            print(f"Writing {measure} items to {item_table}")
            items.to_gbq(item_table,project,chunksize=None,if_exists='append')

            print(f"Writing {measure} metadata to {metadata_table}")
            metadata.to_gbq(metadata_table,project,chunksize=None,if_exists='append')


for file in taxi_filenames:    
    if stop_date in file[:10]:
        break
    else:
        taxi_data = parser.load_taxi_data(file)
        taxi_data.to_gbq(dataset_id+'.'+'taxi-availability',project,chunksize=None,if_exists='append')

Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-24T22-21-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9058.97it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11214.72it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-24T22-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9279.43it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 8559.80it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-24T22-45-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10951.19it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9709.04it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-24T23-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11397.57it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10951.19it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-24T23-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10782.27it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11096.04it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-24T23-30-05.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9709.04it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11491.24it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-24T23-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10155.70it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11366.68it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T00-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10230.01it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 6213.78it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T00-15-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10672.53it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10837.99it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T00-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10230.01it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10672.53it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T00-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8128.50it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10979.85it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T01-00-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8081.51it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 7319.90it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T01-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9754.20it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 8981.38it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T01-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11125.47it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9892.23it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T01-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9597.95it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10810.06it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T02-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11066.77it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9489.38it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T02-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9799.78it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10058.28it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T02-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11037.64it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10672.53it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T02-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10459.61it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10591.68it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T03-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10699.76it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10280.16it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T03-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10381.94it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11244.78it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T03-30-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8256.50it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9892.23it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T03-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11397.57it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11428.62it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T04-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10810.06it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11184.81it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T04-15-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10591.68it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 8943.08it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T04-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11366.68it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10082.46it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T04-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9279.43it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9279.43it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T05-00-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10754.63it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9939.11it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T05-15-05.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8774.69it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 8886.24it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T05-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11244.78it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10591.68it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T05-45-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11522.81it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 5190.97it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T06-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11275.01it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11586.48it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T06-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9868.95it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10106.76it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T06-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9799.78it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11650.84it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T06-45-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9731.56it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9198.04it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T07-00-07.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10538.45it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 8719.97it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T07-15-06.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 588.26it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9799.78it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T07-30-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11397.57it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11428.62it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T07-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8439.24it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9000.65it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T08-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9754.20it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10922.67it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T08-15-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11184.81it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11683.30it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T08-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10058.28it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10356.31it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T08-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 2400.86it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11915.64it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T09-00-05.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9868.95it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11184.81it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T09-15-05.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11155.06it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11650.84it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T09-30-06.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11244.78it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11214.72it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T09-45-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10979.85it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9986.44it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T10-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11366.68it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9939.11it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T10-15-29.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10866.07it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10979.85it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T10-30-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10407.70it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10922.67it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T10-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10727.12it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10672.53it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T11-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10330.80it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9776.93it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T11-15-05.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 5229.81it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9868.95it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T11-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9058.97it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11244.78it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T11-45-07.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11554.56it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10280.16it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T12-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10180.35it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11305.40it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T12-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11096.04it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11066.77it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T12-30-05.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11008.67it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11244.78it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T12-45-06.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10155.70it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 8272.79it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T13-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 7884.03it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9642.08it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T13-15-05.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9576.04it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10058.28it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T13-30-06.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9362.29it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 533.83it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T13-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11275.01it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9597.95it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T14-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 2392.64it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10672.53it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T14-15-06.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9341.43it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10672.53it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T14-30-04.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10230.01it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10180.35it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T14-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9915.61it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10305.42it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T15-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9776.93it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10356.31it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T15-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11125.47it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10058.28it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T15-30-04.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 5041.23it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 5133.79it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T15-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9467.95it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11335.96it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T16-00-05.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9915.61it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11522.81it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T16-15-08.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8811.56it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11366.68it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T16-30-04.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10565.00it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11522.81it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T16-45-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8355.19it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10538.45it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T17-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 2529.74it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9986.44it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T17-15-07.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10782.27it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10230.01it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T17-30-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9915.61it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11366.68it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T17-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11397.57it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11366.68it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T18-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10356.31it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11522.81it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T18-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10205.12it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9939.11it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T18-30-08.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10837.99it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 5412.01it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T18-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8559.80it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10459.61it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T19-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10058.28it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11650.84it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T19-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11096.04it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10330.80it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T19-30-06.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9341.43it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10618.49it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T19-45-06.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 5229.81it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10305.42it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T20-00-05.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10699.76it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11096.04it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T20-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8886.24it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11184.81it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T20-30-06.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11305.40it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10951.19it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T20-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11037.64it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11618.57it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T21-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9962.72it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9058.97it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T21-15-06.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 11335.96it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 574.09it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T21-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9709.04it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9300.01it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T21-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10381.94it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9619.96it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T22-00-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 8924.05it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10034.22it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T22-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9467.95it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9320.68it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T22-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9118.05it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9597.95it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T22-45-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9915.61it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 11366.68it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T23-00-01.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 9238.56it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 9218.25it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T23-15-02.json
Writing rainfall items to taxi_dataset.rainfall-items


100%|██████████| 1/1 [00:00<00:00, 10034.22it/s]


Writing rainfall metadata to taxi_dataset.rainfall-metadata


100%|██████████| 1/1 [00:00<00:00, 10131.17it/s]


Current processing ml-eng-cs611-group-project-nea/relative-humidity/2022-05-25T23-30-02.json
Writing rainfall items to taxi_dataset.rainfall-items


  0%|          | 0/1 [00:00<?, ?it/s]

In [1]:
string = '2022-05-27T14-15-03.json'
string[:10]

'2022-05-27'

In [8]:
pattern = r'\d{4}-\d{2}-\d{2}$'
date_match = re.compile(pattern)
print(date_match.findall(string))
print(date_match.findall('2022-05-27'))

[]
['2022-05-27']


## Section 4 - Assignment code

First, get grid dataframe from repo

In [11]:
grids = gpd.read_file('smu-cs611-mleng-project/Gridding/SG_grid/SG_grids.shp')
grids['centroid'] = grids['geometry'].apply(lambda x: x.centroid) # grids get centroid

Next, pass the grid and raw data into the Assignment class

In [15]:
taxi_gdf = parser.load_taxi_gdf(taxi_filenames[0])

In [16]:
assigner = assignment.Assignment(grids=grids,nea_data=nea_data,taxi_data=taxi_gdf)

In [17]:
assigner.nea_preprocess()

Now preprocessing for [rainfall]
rainfall pts has 66 points
Now preprocessing for [air-temperature]
air-temperature pts has 4 points
Now preprocessing for [relative-humidity]
relative-humidity pts has 4 points


Unnamed: 0,grid_num,geometry,centroid,rainfall_station_id,rainfall,Description_x,air-temperature_station_id,air-temperature,Description_y,relative-humidity_station_id,relative-humidity,Description
0,1,"POLYGON ((103.60000 1.47500, 103.61000 1.47500...",POINT (103.60500 1.47006),S112,0.0,rainfall,S121,27.8,air-temperature,S121,85.9,relative-humidity
263,2,"POLYGON ((103.61000 1.47500, 103.62000 1.47500...",POINT (103.61500 1.47006),S112,0.0,rainfall,S100,27.5,air-temperature,S100,89.8,relative-humidity
264,3,"POLYGON ((103.62000 1.47500, 103.63000 1.47500...",POINT (103.62500 1.47006),S112,0.0,rainfall,S100,27.5,air-temperature,S100,89.8,relative-humidity
265,4,"POLYGON ((103.63000 1.47500, 103.64000 1.47500...",POINT (103.63500 1.47006),S112,0.0,rainfall,S100,27.5,air-temperature,S100,89.8,relative-humidity
266,5,"POLYGON ((103.64000 1.47500, 103.65000 1.47500...",POINT (103.64500 1.47006),S112,0.0,rainfall,S100,27.5,air-temperature,S100,89.8,relative-humidity
...,...,...,...,...,...,...,...,...,...,...,...,...
1081,1211,"POLYGON ((104.00000 1.21789, 104.01000 1.21789...",POINT (104.00500 1.21294),S208,0.0,rainfall,S24,29.3,air-temperature,S24,77.8,relative-humidity
1082,1212,"POLYGON ((104.01000 1.21789, 104.02000 1.21789...",POINT (104.01500 1.21294),S208,0.0,rainfall,S24,29.3,air-temperature,S24,77.8,relative-humidity
1083,1213,"POLYGON ((104.02000 1.21789, 104.03000 1.21789...",POINT (104.02500 1.21294),S208,0.0,rainfall,S24,29.3,air-temperature,S24,77.8,relative-humidity
1084,1214,"POLYGON ((104.03000 1.21789, 104.04000 1.21789...",POINT (104.03500 1.21294),S208,0.0,rainfall,S24,29.3,air-temperature,S24,77.8,relative-humidity


In [18]:
assigner.taxi_preprocess()

[1022] coordinates parsed
test array is length [1022]


Unnamed: 0,timestamp,grid_num,taxi_count
98,2022-05-27T13:59:30+08:00,112,2
111,2022-05-27T13:59:30+08:00,113,1
59,2022-05-27T13:59:30+08:00,152,1
79,2022-05-27T13:59:30+08:00,155,17
84,2022-05-27T13:59:30+08:00,156,3
...,...,...,...
160,2022-05-27T13:59:30+08:00,971,1
164,2022-05-27T13:59:30+08:00,972,1
102,2022-05-27T13:59:30+08:00,1013,4
108,2022-05-27T13:59:30+08:00,1058,2


In [19]:
assignment_df = assigner.merge_grids()
assignment_df

Unnamed: 0,grid_num,geometry,centroid,rainfall_station_id,rainfall,Description_x,air-temperature_station_id,air-temperature,Description_y,relative-humidity_station_id,relative-humidity,Description,timestamp,taxi_count
0,1,"POLYGON ((103.60000 1.47500, 103.61000 1.47500...",POINT (103.60500 1.47006),S112,0.0,rainfall,S121,27.8,air-temperature,S121,85.9,relative-humidity,2022-05-27T13:59:30+08:00,0.0
1,2,"POLYGON ((103.61000 1.47500, 103.62000 1.47500...",POINT (103.61500 1.47006),S112,0.0,rainfall,S100,27.5,air-temperature,S100,89.8,relative-humidity,2022-05-27T13:59:30+08:00,0.0
2,3,"POLYGON ((103.62000 1.47500, 103.63000 1.47500...",POINT (103.62500 1.47006),S112,0.0,rainfall,S100,27.5,air-temperature,S100,89.8,relative-humidity,2022-05-27T13:59:30+08:00,0.0
3,4,"POLYGON ((103.63000 1.47500, 103.64000 1.47500...",POINT (103.63500 1.47006),S112,0.0,rainfall,S100,27.5,air-temperature,S100,89.8,relative-humidity,2022-05-27T13:59:30+08:00,0.0
4,5,"POLYGON ((103.64000 1.47500, 103.65000 1.47500...",POINT (103.64500 1.47006),S112,0.0,rainfall,S100,27.5,air-temperature,S100,89.8,relative-humidity,2022-05-27T13:59:30+08:00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1210,1211,"POLYGON ((104.00000 1.21789, 104.01000 1.21789...",POINT (104.00500 1.21294),S208,0.0,rainfall,S24,29.3,air-temperature,S24,77.8,relative-humidity,2022-05-27T13:59:30+08:00,0.0
1211,1212,"POLYGON ((104.01000 1.21789, 104.02000 1.21789...",POINT (104.01500 1.21294),S208,0.0,rainfall,S24,29.3,air-temperature,S24,77.8,relative-humidity,2022-05-27T13:59:30+08:00,0.0
1212,1213,"POLYGON ((104.02000 1.21789, 104.03000 1.21789...",POINT (104.02500 1.21294),S208,0.0,rainfall,S24,29.3,air-temperature,S24,77.8,relative-humidity,2022-05-27T13:59:30+08:00,0.0
1213,1214,"POLYGON ((104.03000 1.21789, 104.04000 1.21789...",POINT (104.03500 1.21294),S208,0.0,rainfall,S24,29.3,air-temperature,S24,77.8,relative-humidity,2022-05-27T13:59:30+08:00,0.0


In [10]:
taxi_data

Unnamed: 0,index,timestamp,geometry
0,0,2022-05-27T13:59:30+08:00,POINT (103.62462 1.30000)
1,1,2022-05-27T13:59:30+08:00,POINT (103.65803 1.31233)
2,2,2022-05-27T13:59:30+08:00,POINT (103.66913 1.32557)
3,3,2022-05-27T13:59:30+08:00,POINT (103.67967 1.32651)
4,4,2022-05-27T13:59:30+08:00,POINT (103.67998 1.31483)
...,...,...,...
1017,1017,2022-05-27T13:59:30+08:00,POINT (103.98892 1.35764)
1018,1018,2022-05-27T13:59:30+08:00,POINT (103.98980 1.35884)
1019,1019,2022-05-27T13:59:30+08:00,POINT (103.98986 1.36000)
1020,1020,2022-05-27T13:59:30+08:00,POINT (103.98989 1.36000)
