# re-sample input files

In [2]:
!pip install --upgrade google-cloud-storage --user jupyter

Collecting google-cloud-storage
  Using cached google_cloud_storage-1.33.0-py2.py3-none-any.whl (92 kB)
Collecting jupyter
  Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)
Collecting google-resumable-media<2.0dev,>=1.0.0
  Using cached google_resumable_media-1.1.0-py2.py3-none-any.whl (75 kB)
Collecting google-crc32c<2.0dev,>=1.0; python_version >= "3.5"
  Using cached google_crc32c-1.0.0-cp37-cp37m-manylinux2010_x86_64.whl (39 kB)
Installing collected packages: google-crc32c, google-resumable-media, google-cloud-storage, jupyter
Successfully installed google-cloud-storage-1.33.0 google-crc32c-1.0.0 google-resumable-media-1.1.0 jupyter-1.0.0


In [3]:
!gcloud iam service-accounts create hydraulic

Created service account [hydraulic].


To take a quick anonymous survey, run:
  $ gcloud survey



In [10]:
!export PROJECT_ID='osi-pi-gcp-accelerator' export NAME='hydraulic'&& echo gcloud projects add-iam-policy-binding $PROJECT_ID --member="serviceAccount:$NAME@${PROJECT_ID}.iam.gserviceaccount.com" --role="roles/owner"

gcloud projects add-iam-policy-binding osi-pi-gcp-accelerator --member=serviceAccount:hydraulic@osi-pi-gcp-accelerator.iam.gserviceaccount.com --role=roles/owner


In [15]:
!export PROJECT_ID='osi-pi-gcp-accelerator' export NAME='hydraulic'&& echo gcloud iam service-accounts keys create $NAME.json --iam-account=$NAME@$PROJECT_ID.iam.gserviceaccount.com

gcloud iam service-accounts keys create hydraulic.json --iam-account=hydraulic@osi-pi-gcp-accelerator.iam.gserviceaccount.com


In [36]:
%%bash
file="create-gs.py"
bucket='hydraulic'
cat > $file <<EOF

# Imports the Google Cloud client library
from google.cloud import storage

# Instantiates a client
storage_client = storage.Client()

# The name for the new bucket
bucket_name = "$bucket"

# Creates the new bucket
bucket = storage_client.create_bucket(bucket_name)

print("Bucket {} created.".format(bucket.name))

EOF
chmod +x $file

In [37]:
!export GOOGLE_APPLICATION_CREDENTIALS="$HOME/working/hydraulic.json" && python create-gs.py

Bucket hydraulic created.


In [38]:
# on VM
# gcloud auth activate-service-account --key-file=hydraulic.json
# gsutil cp *  gs://hydraulic/ufl

In [114]:
import pandas as pd
import datetime

In [134]:
from google.cloud import storage

def download_blob(bucket_name, source_blob_name, destination_file_name):
    """Downloads a blob from the bucket."""
    # bucket_name = "your-bucket-name"
    # source_blob_name = "storage-object-name"
    # destination_file_name = "local/path/to/file"

    storage_client = storage.Client()

    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    blob.download_to_filename(destination_file_name)

    print(
        "Blob {} downloaded to {}.".format(
            source_blob_name, destination_file_name
        )
    )
    
def list_blobs(bucket_name):
    """Lists all the blobs in the bucket."""
    # bucket_name = "your-bucket-name"

    storage_client = storage.Client()

    # Note: Client.list_blobs requires at least package version 1.17.0.
    blobs = storage_client.list_blobs(bucket_name)

    for blob in blobs:
        print(blob.name)
        
def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    # bucket_name = "your-bucket-name"
    # source_file_name = "local/path/to/file"
    # destination_blob_name = "storage-object-name"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print(
        "File {} uploaded to {}.".format(
            source_file_name, destination_blob_name
        )
    )

In [39]:
gsb='hydraulic'
list_blobs(gsb)

ufl/CE.csv
ufl/CP.csv
ufl/EPS1.csv
ufl/FS1.csv
ufl/FS2.csv
ufl/PS1.csv
ufl/PS2.csv
ufl/PS3.csv
ufl/PS4.csv
ufl/PS5.csv
ufl/PS6.csv
ufl/SE.csv
ufl/TS1.csv
ufl/TS2.csv
ufl/TS3.csv
ufl/TS4.csv
ufl/VS1-missing-values-add-these-manually-to-pi.csv
ufl/VS1.csv
ufl/hydraulic.json
ufl/profile.csv


In [46]:
# process pressure files
for suffix in range(1,7,1):
    file=f"ufl/PS{suffix}.csv"
    lfile=f"PS{suffix}.csv"
    download_blob(gsb,file,lfile)

Blob ufl/PS1.csv downloaded to PS1.csv.
Blob ufl/PS2.csv downloaded to PS2.csv.
Blob ufl/PS3.csv downloaded to PS3.csv.
Blob ufl/PS4.csv downloaded to PS4.csv.
Blob ufl/PS5.csv downloaded to PS5.csv.
Blob ufl/PS6.csv downloaded to PS6.csv.


In [160]:
# read a file downsample and write to new file
def resample(lfile,timedelta,nfile_prefix):
    df = pd.read_csv(lfile,sep='\t',names=['tag','timestamp','value'])  # read file and  set column names
    df['timestamp'] = pd.to_datetime(df['timestamp']) # format datetime
    df.set_index('timestamp',inplace=True) 
    dftemp = df['value'].resample(timedelta).mean()  # downsample
    tag = df['tag'][0]  # save tag name
    df = dftemp.to_frame() # series to data frame
    df['tag'] = tag  # add static column
    df.reset_index(inplace=True)
    df = df[['tag','timestamp','value']]  # order tags 
    # date format: 2020-02-11T00:00:00.000Z # millisecond is legacy TODO remove
    df.to_csv(f"{nfile_prefix}{lfile}",sep='\t',float_format="%.2f",header=False,index=False,
              date_format="%Y-%m-%dT%H:%M:%S.000Z")
    return df

In [161]:
# process pressure files
timedelta = datetime.timedelta(seconds=1)
for suffix in range(1,7,1):
    df = resample(f"PS{suffix}.csv",timedelta,"f")

In [162]:
# process power file
suffix="1"
file=f"ufl/EPS{suffix}.csv"
lfile=f"EPS{suffix}.csv"
#############download_blob(gsb,file,lfile)
timedelta = datetime.timedelta(seconds=1)
df = resample(lfile,timedelta,"f")

In [159]:
# process flow files
timedelta = datetime.timedelta(seconds=1)
for suffix in range(1,3,1):
    file=f"ufl/FS{suffix}.csv"
    lfile=f"FS{suffix}.csv"
    #########download_blob(gsb,file,lfile)
    df = resample(lfile,timedelta,"f")

In [164]:
import os
import re

files = os.listdir(path='.')
filtered_files = [ file for file in files if re.search('^f.*csv$',file)]

In [165]:
filtered_files

['fFS1.csv',
 'fPS5.csv',
 'fPS1.csv',
 'fPS4.csv',
 'fFS2.csv',
 'fPS2.csv',
 'fPS3.csv',
 'fEPS1.csv',
 'fPS6.csv']

In [166]:
#def upload_blob(bucket_name, source_file_name, destination_blob_name):
for lfile in filtered_files:
    file=f"ufl/{lfile}"
    upload_blob(gsb,lfile,file)

File fFS1.csv uploaded to ufl/fFS1.csv.
File fPS5.csv uploaded to ufl/fPS5.csv.
File fPS1.csv uploaded to ufl/fPS1.csv.
File fPS4.csv uploaded to ufl/fPS4.csv.
File fFS2.csv uploaded to ufl/fFS2.csv.
File fPS2.csv uploaded to ufl/fPS2.csv.
File fPS3.csv uploaded to ufl/fPS3.csv.
File fEPS1.csv uploaded to ufl/fEPS1.csv.
File fPS6.csv uploaded to ufl/fPS6.csv.


In [None]:
# on VM
#gsutil cp gs://hydraulic/ufl/f* .