<a href="https://colab.research.google.com/github/mgstockwell/water_well_prediction/blob/main/Groundwater_Load_Lithology_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Copyright 2019 Google LLC. { display-mode: "form" }
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

<table class="ee-notebook-buttons" align="left"><td>
<a target="_blank"  href="http://colab.research.google.com/github/google/earthengine-api/blob/master/python/examples/ipynb/ee-api-colab-setup.ipynb">
    <img src="https://www.tensorflow.org/images/colab_logo_32px.png" /> Run in Google Colab</a>
</td><td>
<a target="_blank"  href="https://github.com/google/earthengine-api/blob/master/python/examples/ipynb/ee-api-colab-setup.ipynb"><img width=32px src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" /> View source on GitHub</a></td></table>

# Earth Engine Python API Colab Setup

This notebook demonstrates how to setup the Earth Engine Python API in Colab and provides several examples of how to print and visualize Earth Engine processed data.

### Authenticate and initialize

Run the `ee.Authenticate` function to authenticate your access to Earth Engine servers and `ee.Initialize` to initialize it. Upon running the following cell you'll be asked to grant Earth Engine access to your Google account. Follow the instructions printed to the cell.

In [None]:
# import the Earth Engine API
import os, sys
from google.colab import files
import ee
from google.auth.transport.requests import AuthorizedSession
from google.oauth2 import service_account
import pandas as pd

project = 'msd8654-498-dev'
#!gcloud auth login --project {PROJECT}
if os.path.exists('/content/msd8654-498-dev-bb45b07b4be4.json'):
  print('key exists')
else:
  KEY = files.upload()
service_account = 'my-first-app-92f7826c73ade84fa@msd8654-498-dev.iam.gserviceaccount.com'
credentials = ee.ServiceAccountCredentials(service_account, '/content/msd8654-498-dev-bb45b07b4be4.json')

# Trigger the authentication flow.
# ee.Authenticate()

ee.Initialize(
  credentials=credentials,
  project=project,
  opt_url='https://earthengine-highvolume.googleapis.com'
)

df = pd.DataFrame()
df_state_cds = pd.DataFrame()


Saving msd8654-498-dev-bb45b07b4be4.json to msd8654-498-dev-bb45b07b4be4.json


## Test the API

Test the API by printing the elevation of Mount Everest.

In [None]:
# Print the elevation of Mount Everest.
dem = ee.Image('USGS/SRTMGL1_003')
xy = ee.Geometry.Point([86.9250, 27.9881])
elev = dem.sample(xy, 30).first().get('elevation').getInfo()
print('Mount Everest elevation (m):', elev)

Mount Everest elevation (m): 8729


In [None]:
import os
import json
import datetime
from google.cloud import bigquery
from google.oauth2 import service_account
from google.colab import auth

google_project_id = 'msd8654-498-dev'
# Set the project id
os.environ.putenv('GOOGLE_CLOUD_PROJECT', google_project_id)

# logon and get credentials.
if os.getenv('GAE_ENV', '').startswith('standard'):
  # Production in the standard environment, all OK
  None
elif os.path.exists(google_project_id + ".json"):
  # Local execution.
  key_path = google_project_id + ".json"
  credentials = service_account.Credentials.from_service_account_file(
      key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
  )
  client = bigquery.Client(credentials=credentials, project=credentials.project_id,)
else:
  auth.authenticate_user()

client = bigquery.Client(project=google_project_id)
print('Authenticated')



Authenticated


## BigQuery Upload Function

In [None]:
from google.cloud import storage
from google.cloud import bigquery
import urllib.request
import os, datetime

# note: google_project_name  & google_dataset_name at top (global)

def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the google storage bucket."""

    storage_client = storage.Client(project=google_project_id)
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print(
        "File {} uploaded to Storage Bucket {} successfully . {}".format(
            source_file_name, destination_blob_name, datetime.datetime.now()
        )
    )

# Use BigQuery via magics

The `google.cloud.bigquery` library also includes a magic command which runs a query and either displays the result or saves it to a variable as a `DataFrame`.

In [None]:
# Save output in a variable `df`
%%bigquery --project msd8654-498-dev df
SELECT
          DISTINCT site_no,
          station_nm,
          site_tp_cd,
          lat_va,
          long_va,
          dec_lat_va,
          dec_long_va,
          district_cd,
          right(concat('0',state_cd),2) state_cd,
          county_cd,
          well_depth_va,
          well_depth_hue
        FROM
          `msd8654-498-dev.usgs.groundwater_sites_vis`
        WHERE
          well_depth_va IS NOT NULL
          and dec_lat_va is not null
          and dec_long_va is not null
          and cast(state_cd as int) > 25

In [None]:
df

Unnamed: 0,site_no,station_nm,site_tp_cd,lat_va,long_va,dec_lat_va,dec_long_va,district_cd,state_cd,county_cd,well_depth_va,well_depth_hue
0,415235083414001,07S 06E 15ADBB01 MONROE CO (PETERSBURG S),GW,415234.8,834139.5,41.876333,-83.694306,26,26,115,17.0,2
1,461605086374101,45N 19W 25BDDC 01 ALGER KENTUCKY CCC,GW,461607.2,863737.6,46.268620,-86.627265,26,26,3,67.0,3
2,465002089321601,51N 41W 08BDBC01 ONTONAGON CO (SILVER CITY),GW,465000.1,893217.3,46.833306,-89.538289,26,26,131,100.0,4
3,462159084442201,46N 04W 24DAAD01 CHIPPEWA CO (RACO),GW,462159.4,844420.8,46.366519,-84.739169,26,26,33,53.9,3
4,433133086082601,13N 15W 18AAAA01 OCEANA CO (HESPERIA),GW,433133.9,860825.6,43.526096,-86.140510,26,26,127,76.1,3
...,...,...,...,...,...,...,...,...,...,...,...,...
564509,433332108483601,7N-01E-30ba 01,GW,433332,1084836,43.558844,-108.810677,56,56,13,1272.0,6
564510,425443108473301,2S-01E-05ccc01,GW,425442,1084735,42.911625,-108.793731,56,56,13,50.0,3
564511,423127108132200,30-095-31ad 01,GW,423127,1081322,42.524127,-108.223428,56,56,13,75.0,3
564512,441703108003602,50-093-22ddd02,GW,441703,1080036,44.284124,-108.010655,56,56,3,25.0,2


In [None]:
# Save output in a variable `df_state_cds`

%%bigquery --project msd8654-498-dev df_state_cds
SELECT
  state_fips_code,
  state_postal_abbreviation,
  state_name,
  state_gnisid
FROM
  `bigquery-public-data.census_utility.fips_codes_states`
  where state_fips_code >'25'


In [None]:
display(df_state_cds)

### Get Lithology from Earth Engine assets
[Global ALOS mTPI (Multi-Scale Topographic Position Index)](https://developers.google.com/earth-engine/datasets/catalog/CSP_ERGo_1_0_Global_ALOS_mTPI#description)
The image layer is made up of many polygons with a "band" value. The lat/long point will fall within a polygon, and the value of band (in this case relative elevation) will be returned.

  <p></p>

```
Resolution
270 meters

Bands

Name	    Units	  Min	    Max	
AVE	      Meters	-3758*	10963*	
Description: ALOS-derived mTPI ranging from negative (valleys) to positive (ridges) values
```


In [None]:
from time import sleep
import pandas as pd

# Check a known point
# Point (-75.7778, 40.4375) at 76m/px
# Pixels
# Lithology: Image (1 band)
# b1: 3
# ALOS mTPI: Image (1 band)
# AVE: -5

dem = ee.Image('CSP/ERGo/1_0/US/lithology')

def get_lith(long: float, lat: float):
  xy = ee.Geometry.Point([long,lat])
  data = dem.sample(xy, 10).first().get('b1').getInfo()
  #print(counter, 'Litholoy band value', lat, long, data )
  return data

'''0 site_no        350160106324201
dec_lat_va           35.033358
dec_long_va        -106.545522
'''
print(datetime.datetime.now(), 'START')

for j, row in df_state_cds.iterrows():
  state_postal_abbreviation = row["state_postal_abbreviation"]
  print("state_cd:", state_postal_abbreviation)
  df_filtered = df[df.state_cd==row["state_fips_code"]]
  filename = f'lithology{state_postal_abbreviation}.csv'
  file = open(filename,'w')
  for i, row in df_filtered.iterrows():
    try:
      val = get_lith(row["dec_long_va"], row["dec_lat_va"])
      file.writelines(str(i) + "," + row["site_no"] + "," + str(val) + '\n')
    except BaseException as err:
      print(f"   Unexpected {err}, {type(err)}")
      print('ERROR processed:',i,row.to_json(), val, " "*10, datetime.datetime.now())
      continue

    if (i%1000==0): 
      print('processed:',i,row["site_no"], val,state_postal_abbreviation ," "*10, datetime.datetime.now())
      sleep(10)
  file.close()
  upload_blob('msd8654-498-dev-usgs',filename, filename)
print(datetime.datetime.now(), 'END')


### Check One Point

In [None]:
#ERROR processed: 26637 {"site_no":"353545121072901","station_nm":"027S008E08R005M","site_tp_cd":"GW","lat_va":"353545","long_va":"1210729",
# "dec_lat_va":35.5958041,"dec_long_va":-121.1257494,"district_cd":"6","state_cd":"06","county_cd":"79","well_depth_va":145.0,"well_depth_hue":4} 
#None            2022-05-21 04:19:59.902246
# processed: 27000 335734118165601 None CA            2022-05-21 04:25:19.321003
#   Unexpected Element.get: Parameter 'object' is required., <class 'ee.ee_exception.EEException'>

dem = ee.Image('CSP/ERGo/1_0/US/lithology')

def get_lith(long: float, lat: float):
  xy = ee.Geometry.Point([long,lat])
  data = dem.sample(xy, 10).first().get('b1').getInfo()
  #print(counter, 'Litholoy band value', lat, long, data )
  return data

val = get_lith(-121.1257494, 35.5958041)
print(val)

-8
