In [1]:
import matplotlib.pyplot as plt
import numpy as np
import geohash2 as gh
import gmaps
import gmaps.datasets

from pyspark.sql.types import StructType, StructField, FloatType, LongType, StringType
from datetime import datetime

In [2]:
feats = []
f = open('../features.txt')
for line_num, line in enumerate(f):
    if line_num == 0:
        # Timestamp
        feats.append(StructField(line.strip(), LongType(), True))
    elif line_num == 1:
        # Geohash
        feats.append(StructField(line.strip(), StringType(), True))
    else:
        # Other features
        feats.append(StructField(line.strip(), FloatType(), True))
    
schema = StructType(feats)


In [3]:
%%time

df = spark.read.format('csv').option('sep', '\t').schema(schema).load('hdfs://orion11:13030/nam_s')
df.take(1)

CPU times: user 55.5 ms, sys: 7.67 ms, total: 63.2 ms
Wall time: 2min 38s


In [6]:
df.describe([
    'snow_cover_surface',
]).show()

+-------+------------------+
|summary|snow_cover_surface|
+-------+------------------+
|  count|         108000000|
|   mean|16.332520101851852|
| stddev| 36.90268917507707|
|    min|               0.0|
|    max|             100.0|
+-------+------------------+



In [10]:
%%time
df.createOrReplaceTempView("nam_small")
snowy_location = spark.sql(
    f'''SELECT * FROM(SELECT substr(Geohash,1,4) as geoloc,count(substr(Geohash,1,4)) as count
        FROM nam_small 
        WHERE snow_cover_surface > 52
        GROUP BY substr(Geohash,1,4))
    ORDER BY count 
    ''').collect()
print(f'Number of Locations = {len(snowy_location)}')

Number of Locations = 30699
CPU times: user 210 ms, sys: 7.27 ms, total: 218 ms
Wall time: 2min 38s


In [18]:
for row in snowy_location:
    print (row)
    


Row(geoloc='dq0g', count=1)
Row(geoloc='9ucq', count=1)
Row(geoloc='9szz', count=1)
Row(geoloc='9sxh', count=1)
Row(geoloc='9ubr', count=1)
Row(geoloc='dj2j', count=1)
Row(geoloc='9t9g', count=1)
Row(geoloc='9sqc', count=1)
Row(geoloc='9udw', count=1)
Row(geoloc='9ubs', count=1)
Row(geoloc='djud', count=1)
Row(geoloc='9q96', count=1)
Row(geoloc='9v11', count=1)
Row(geoloc='9qc0', count=1)
Row(geoloc='9qkx', count=1)
Row(geoloc='9mtm', count=1)
Row(geoloc='9g2v', count=1)
Row(geoloc='9t2g', count=1)
Row(geoloc='9uds', count=1)
Row(geoloc='9myw', count=1)
Row(geoloc='djz7', count=1)
Row(geoloc='dq09', count=1)
Row(geoloc='9qj9', count=1)
Row(geoloc='djtb', count=1)
Row(geoloc='djt9', count=1)
Row(geoloc='djm2', count=1)
Row(geoloc='9u8m', count=1)
Row(geoloc='dje9', count=1)
Row(geoloc='9vks', count=1)
Row(geoloc='9myu', count=1)
Row(geoloc='9snd', count=1)
Row(geoloc='dpf6', count=1)
Row(geoloc='9st6', count=1)
Row(geoloc='9ubg', count=1)
Row(geoloc='9swb', count=1)
Row(geoloc='9qp3', c

Row(geoloc='9vx6', count=19)
Row(geoloc='djfk', count=19)
Row(geoloc='9v6h', count=19)
Row(geoloc='djfr', count=19)
Row(geoloc='dnp0', count=19)
Row(geoloc='dn07', count=19)
Row(geoloc='9ssw', count=19)
Row(geoloc='9v92', count=19)
Row(geoloc='9tdb', count=19)
Row(geoloc='dn1b', count=19)
Row(geoloc='9tqc', count=19)
Row(geoloc='djbq', count=19)
Row(geoloc='dnp9', count=19)
Row(geoloc='9vy9', count=19)
Row(geoloc='9sz6', count=19)
Row(geoloc='9vs6', count=19)
Row(geoloc='9sut', count=19)
Row(geoloc='dn5b', count=19)
Row(geoloc='9tnt', count=19)
Row(geoloc='djc7', count=19)
Row(geoloc='9qsk', count=19)
Row(geoloc='9prb', count=19)
Row(geoloc='dxvw', count=19)
Row(geoloc='9yhb', count=19)
Row(geoloc='9sup', count=19)
Row(geoloc='9tse', count=19)
Row(geoloc='9vwf', count=19)
Row(geoloc='9vd2', count=19)
Row(geoloc='9tkt', count=19)
Row(geoloc='9mtr', count=19)
Row(geoloc='9trg', count=19)
Row(geoloc='9v67', count=19)
Row(geoloc='9ssg', count=19)
Row(geoloc='9tn9', count=19)
Row(geoloc='9t

Row(geoloc='9y70', count=69)
Row(geoloc='9yv1', count=69)
Row(geoloc='9tvd', count=69)
Row(geoloc='9qdv', count=69)
Row(geoloc='9sse', count=69)
Row(geoloc='9tw6', count=69)
Row(geoloc='9w9n', count=69)
Row(geoloc='9t51', count=69)
Row(geoloc='9y2f', count=69)
Row(geoloc='9qe3', count=69)
Row(geoloc='9ynj', count=69)
Row(geoloc='9tx1', count=69)
Row(geoloc='9y88', count=69)
Row(geoloc='9yk3', count=69)
Row(geoloc='9qsp', count=69)
Row(geoloc='9qsm', count=70)
Row(geoloc='9w34', count=70)
Row(geoloc='c19b', count=70)
Row(geoloc='9y6y', count=70)
Row(geoloc='9qtx', count=70)
Row(geoloc='9th5', count=70)
Row(geoloc='dnrz', count=70)
Row(geoloc='9ymq', count=70)
Row(geoloc='9tfs', count=70)
Row(geoloc='c0vq', count=70)
Row(geoloc='9v8h', count=70)
Row(geoloc='9ss7', count=70)
Row(geoloc='f8nb', count=70)
Row(geoloc='dq2r', count=70)
Row(geoloc='dp6p', count=70)
Row(geoloc='9vb8', count=70)
Row(geoloc='dq2k', count=70)
Row(geoloc='9vyr', count=70)
Row(geoloc='9txx', count=70)
Row(geoloc='9y

Row(geoloc='9y99', count=150)
Row(geoloc='9yst', count=150)
Row(geoloc='9r7f', count=150)
Row(geoloc='9y06', count=150)
Row(geoloc='9r1r', count=150)
Row(geoloc='9ys0', count=151)
Row(geoloc='9wc2', count=151)
Row(geoloc='9wj5', count=151)
Row(geoloc='9xnc', count=151)
Row(geoloc='dne1', count=151)
Row(geoloc='9r28', count=151)
Row(geoloc='dpvr', count=151)
Row(geoloc='9qrr', count=151)
Row(geoloc='dn6v', count=151)
Row(geoloc='9tz5', count=151)
Row(geoloc='9yyg', count=151)
Row(geoloc='c1cw', count=151)
Row(geoloc='9zjd', count=151)
Row(geoloc='9z26', count=151)
Row(geoloc='f8tr', count=151)
Row(geoloc='9w8s', count=151)
Row(geoloc='c25s', count=151)
Row(geoloc='f07j', count=151)
Row(geoloc='9yyb', count=151)
Row(geoloc='9ymm', count=151)
Row(geoloc='c1eq', count=151)
Row(geoloc='9yxn', count=151)
Row(geoloc='9y0u', count=151)
Row(geoloc='9wr3', count=151)
Row(geoloc='dqcf', count=151)
Row(geoloc='dphj', count=151)
Row(geoloc='9y6z', count=151)
Row(geoloc='9whs', count=152)
Row(geoloc

Row(geoloc='9rgb', count=242)
Row(geoloc='9zj9', count=242)
Row(geoloc='cbhv', count=242)
Row(geoloc='c21c', count=242)
Row(geoloc='cb46', count=242)
Row(geoloc='9xpe', count=242)
Row(geoloc='c2qj', count=242)
Row(geoloc='9zd6', count=242)
Row(geoloc='c8x9', count=242)
Row(geoloc='9rc1', count=242)
Row(geoloc='9w53', count=242)
Row(geoloc='9zwv', count=242)
Row(geoloc='c8r9', count=242)
Row(geoloc='dr54', count=242)
Row(geoloc='9yfb', count=242)
Row(geoloc='c216', count=242)
Row(geoloc='dnsx', count=243)
Row(geoloc='9r6s', count=243)
Row(geoloc='9qxn', count=243)
Row(geoloc='dpvh', count=243)
Row(geoloc='9yv7', count=243)
Row(geoloc='9rg5', count=243)
Row(geoloc='9w1f', count=243)
Row(geoloc='dr3e', count=243)
Row(geoloc='fbce', count=243)
Row(geoloc='9r4c', count=243)
Row(geoloc='c40z', count=243)
Row(geoloc='dr51', count=243)
Row(geoloc='dnwe', count=243)
Row(geoloc='9z75', count=243)
Row(geoloc='f06w', count=243)
Row(geoloc='c219', count=243)
Row(geoloc='f8t8', count=243)
Row(geoloc

Row(geoloc='c2ky', count=340)
Row(geoloc='9z1k', count=340)
Row(geoloc='9x3g', count=340)
Row(geoloc='c2e9', count=340)
Row(geoloc='9zxm', count=340)
Row(geoloc='f04p', count=340)
Row(geoloc='dr40', count=340)
Row(geoloc='9wdq', count=340)
Row(geoloc='c8ju', count=340)
Row(geoloc='f08s', count=340)
Row(geoloc='c9nb', count=340)
Row(geoloc='f2nm', count=340)
Row(geoloc='dp5r', count=341)
Row(geoloc='9z37', count=341)
Row(geoloc='dpju', count=341)
Row(geoloc='f8tc', count=341)
Row(geoloc='9w4f', count=341)
Row(geoloc='c8hr', count=341)
Row(geoloc='cc2s', count=341)
Row(geoloc='9qvn', count=341)
Row(geoloc='f0km', count=341)
Row(geoloc='f0n5', count=341)
Row(geoloc='c21y', count=341)
Row(geoloc='9w6q', count=341)
Row(geoloc='9x10', count=341)
Row(geoloc='dpus', count=341)
Row(geoloc='c0y9', count=341)
Row(geoloc='c8qk', count=341)
Row(geoloc='c1ck', count=341)
Row(geoloc='f2wf', count=341)
Row(geoloc='dp32', count=341)
Row(geoloc='9rxe', count=341)
Row(geoloc='cb5j', count=341)
Row(geoloc

Row(geoloc='cbj2', count=433)
Row(geoloc='9rnn', count=433)
Row(geoloc='f2f1', count=433)
Row(geoloc='9rf2', count=433)
Row(geoloc='c944', count=433)
Row(geoloc='9zwc', count=433)
Row(geoloc='9rxg', count=433)
Row(geoloc='9z7t', count=433)
Row(geoloc='cb7v', count=433)
Row(geoloc='c6dq', count=433)
Row(geoloc='ccr4', count=433)
Row(geoloc='fcc1', count=433)
Row(geoloc='drfe', count=433)
Row(geoloc='ccqc', count=433)
Row(geoloc='drkz', count=433)
Row(geoloc='drbk', count=433)
Row(geoloc='9rn5', count=433)
Row(geoloc='fc3z', count=433)
Row(geoloc='dpxn', count=433)
Row(geoloc='9zjr', count=433)
Row(geoloc='dpfy', count=433)
Row(geoloc='c8kd', count=433)
Row(geoloc='9zfs', count=433)
Row(geoloc='dp8w', count=433)
Row(geoloc='9zty', count=434)
Row(geoloc='9zft', count=434)
Row(geoloc='9zz5', count=434)
Row(geoloc='dpe6', count=434)
Row(geoloc='f0nk', count=434)
Row(geoloc='c8kp', count=434)
Row(geoloc='9rv2', count=434)
Row(geoloc='dp79', count=434)
Row(geoloc='c88c', count=434)
Row(geoloc

Row(geoloc='f81w', count=537)
Row(geoloc='dpe9', count=537)
Row(geoloc='c886', count=537)
Row(geoloc='c2wv', count=537)
Row(geoloc='c993', count=537)
Row(geoloc='f85s', count=537)
Row(geoloc='9r9u', count=537)
Row(geoloc='c2tp', count=538)
Row(geoloc='9xx7', count=538)
Row(geoloc='cb89', count=538)
Row(geoloc='9wf7', count=538)
Row(geoloc='c9pe', count=538)
Row(geoloc='9wgp', count=538)
Row(geoloc='cb9b', count=538)
Row(geoloc='f08p', count=538)
Row(geoloc='c3u2', count=538)
Row(geoloc='9wte', count=538)
Row(geoloc='c9qj', count=538)
Row(geoloc='drt0', count=538)
Row(geoloc='f3yv', count=538)
Row(geoloc='c93k', count=538)
Row(geoloc='cbnx', count=538)
Row(geoloc='fdme', count=538)
Row(geoloc='f21c', count=539)
Row(geoloc='c843', count=539)
Row(geoloc='drzq', count=539)
Row(geoloc='c3qt', count=539)
Row(geoloc='9x89', count=539)
Row(geoloc='c8mg', count=539)
Row(geoloc='c8t7', count=539)
Row(geoloc='f2np', count=539)
Row(geoloc='c9pk', count=539)
Row(geoloc='9xqs', count=539)
Row(geoloc

Row(geoloc='cbgg', count=633)
Row(geoloc='dr89', count=633)
Row(geoloc='c2de', count=633)
Row(geoloc='dpq6', count=633)
Row(geoloc='f00p', count=633)
Row(geoloc='9xvt', count=633)
Row(geoloc='c29z', count=633)
Row(geoloc='f8jm', count=633)
Row(geoloc='cb2y', count=633)
Row(geoloc='9zte', count=633)
Row(geoloc='f1k9', count=633)
Row(geoloc='dpcj', count=633)
Row(geoloc='cbc0', count=633)
Row(geoloc='cfwd', count=633)
Row(geoloc='dr9u', count=633)
Row(geoloc='c8q7', count=633)
Row(geoloc='f2b2', count=633)
Row(geoloc='9xum', count=633)
Row(geoloc='9zw5', count=634)
Row(geoloc='c31p', count=634)
Row(geoloc='9zx9', count=634)
Row(geoloc='f321', count=634)
Row(geoloc='c35m', count=634)
Row(geoloc='f2h6', count=634)
Row(geoloc='f03k', count=634)
Row(geoloc='c313', count=634)
Row(geoloc='c8ex', count=634)
Row(geoloc='9xxx', count=634)
Row(geoloc='c31z', count=634)
Row(geoloc='c2su', count=634)
Row(geoloc='9x49', count=634)
Row(geoloc='fdhs', count=634)
Row(geoloc='c8kx', count=634)
Row(geoloc

Row(geoloc='c3vu', count=725)
Row(geoloc='c9fu', count=725)
Row(geoloc='f236', count=725)
Row(geoloc='c9fm', count=725)
Row(geoloc='cfxj', count=725)
Row(geoloc='c9cz', count=725)
Row(geoloc='9zgf', count=725)
Row(geoloc='drer', count=725)
Row(geoloc='c8rs', count=725)
Row(geoloc='c67e', count=725)
Row(geoloc='f1zn', count=725)
Row(geoloc='c9gv', count=725)
Row(geoloc='c9qn', count=725)
Row(geoloc='c1wb', count=725)
Row(geoloc='cbvh', count=725)
Row(geoloc='f822', count=725)
Row(geoloc='c9r4', count=725)
Row(geoloc='c3ue', count=725)
Row(geoloc='cc79', count=725)
Row(geoloc='c2nm', count=725)
Row(geoloc='c9re', count=725)
Row(geoloc='c9q4', count=725)
Row(geoloc='c3vp', count=725)
Row(geoloc='f9t9', count=725)
Row(geoloc='cbjh', count=725)
Row(geoloc='c381', count=726)
Row(geoloc='cfz3', count=726)
Row(geoloc='cc5y', count=726)
Row(geoloc='f48n', count=726)
Row(geoloc='c3fp', count=726)
Row(geoloc='c6pd', count=726)
Row(geoloc='f1mk', count=726)
Row(geoloc='f08z', count=726)
Row(geoloc

Row(geoloc='c3t4', count=782)
Row(geoloc='c30c', count=782)
Row(geoloc='f26p', count=782)
Row(geoloc='cfv6', count=782)
Row(geoloc='c65g', count=782)
Row(geoloc='cd6e', count=783)
Row(geoloc='f0t8', count=783)
Row(geoloc='cby8', count=783)
Row(geoloc='c6dd', count=783)
Row(geoloc='f1jf', count=783)
Row(geoloc='c3td', count=783)
Row(geoloc='c6xk', count=783)
Row(geoloc='c9sw', count=783)
Row(geoloc='c2p3', count=783)
Row(geoloc='c3tp', count=783)
Row(geoloc='f9he', count=783)
Row(geoloc='c6xb', count=783)
Row(geoloc='c3sx', count=783)
Row(geoloc='f0nj', count=783)
Row(geoloc='f0t7', count=783)
Row(geoloc='c99r', count=783)
Row(geoloc='cbe0', count=783)
Row(geoloc='f28s', count=783)
Row(geoloc='ccsc', count=783)
Row(geoloc='9rzj', count=783)
Row(geoloc='cdjg', count=783)
Row(geoloc='f955', count=783)
Row(geoloc='c46f', count=783)
Row(geoloc='f38v', count=783)
Row(geoloc='c3sg', count=783)
Row(geoloc='c6h6', count=783)
Row(geoloc='cfzk', count=783)
Row(geoloc='f0cc', count=783)
Row(geoloc

Row(geoloc='cdf0', count=851)
Row(geoloc='f2xd', count=851)
Row(geoloc='cbr1', count=851)
Row(geoloc='c3bb', count=851)
Row(geoloc='cds0', count=852)
Row(geoloc='f2v1', count=852)
Row(geoloc='f3t4', count=852)
Row(geoloc='ccq4', count=852)
Row(geoloc='fb9x', count=852)
Row(geoloc='f3bz', count=852)
Row(geoloc='c60r', count=852)
Row(geoloc='f2sj', count=852)
Row(geoloc='fd2h', count=852)
Row(geoloc='c42f', count=852)
Row(geoloc='cdjx', count=852)
Row(geoloc='ccg4', count=852)
Row(geoloc='f9uq', count=852)
Row(geoloc='f8xs', count=852)
Row(geoloc='f4x6', count=852)
Row(geoloc='cd0f', count=852)
Row(geoloc='f4bg', count=852)
Row(geoloc='fdkw', count=852)
Row(geoloc='f14g', count=852)
Row(geoloc='f0r7', count=852)
Row(geoloc='cd73', count=852)
Row(geoloc='c3rx', count=852)
Row(geoloc='cbsd', count=852)
Row(geoloc='f1zt', count=852)
Row(geoloc='cb7u', count=852)
Row(geoloc='c2nr', count=852)
Row(geoloc='fd75', count=853)
Row(geoloc='f85f', count=853)
Row(geoloc='c9h1', count=853)
Row(geoloc

Row(geoloc='c3tn', count=944)
Row(geoloc='ccgj', count=944)
Row(geoloc='cdds', count=944)
Row(geoloc='f4zc', count=944)
Row(geoloc='c6d2', count=944)
Row(geoloc='c2fm', count=944)
Row(geoloc='f950', count=944)
Row(geoloc='drvs', count=944)
Row(geoloc='f17u', count=944)
Row(geoloc='f4pd', count=944)
Row(geoloc='f2fh', count=944)
Row(geoloc='ccvd', count=944)
Row(geoloc='ccy5', count=944)
Row(geoloc='f2zx', count=944)
Row(geoloc='c3v3', count=944)
Row(geoloc='ccz4', count=944)
Row(geoloc='cdr4', count=945)
Row(geoloc='c361', count=945)
Row(geoloc='f38c', count=945)
Row(geoloc='cds9', count=945)
Row(geoloc='c6kv', count=945)
Row(geoloc='fc0s', count=945)
Row(geoloc='cf1q', count=945)
Row(geoloc='cdes', count=945)
Row(geoloc='c9dm', count=945)
Row(geoloc='f621', count=945)
Row(geoloc='fd85', count=945)
Row(geoloc='cf5s', count=945)
Row(geoloc='f26g', count=945)
Row(geoloc='f9h9', count=945)
Row(geoloc='ccuu', count=945)
Row(geoloc='f17z', count=945)
Row(geoloc='f6wt', count=945)
Row(geoloc

Row(geoloc='f4w6', count=1036)
Row(geoloc='cdxt', count=1036)
Row(geoloc='f0wz', count=1036)
Row(geoloc='f1ye', count=1036)
Row(geoloc='f1ud', count=1036)
Row(geoloc='c3n3', count=1036)
Row(geoloc='cfuc', count=1036)
Row(geoloc='cbrr', count=1037)
Row(geoloc='f3f7', count=1037)
Row(geoloc='f36x', count=1037)
Row(geoloc='c2yp', count=1037)
Row(geoloc='cfm4', count=1037)
Row(geoloc='f1g3', count=1037)
Row(geoloc='f3q3', count=1037)
Row(geoloc='f4dq', count=1037)
Row(geoloc='f3js', count=1037)
Row(geoloc='f97f', count=1037)
Row(geoloc='f6rt', count=1037)
Row(geoloc='cf3y', count=1037)
Row(geoloc='cdg9', count=1037)
Row(geoloc='f9dj', count=1037)
Row(geoloc='c9hs', count=1037)
Row(geoloc='f4gf', count=1037)
Row(geoloc='f90u', count=1037)
Row(geoloc='f9ru', count=1037)
Row(geoloc='f9ke', count=1037)
Row(geoloc='f6qw', count=1037)
Row(geoloc='cdyf', count=1037)
Row(geoloc='f4db', count=1037)
Row(geoloc='f3md', count=1037)
Row(geoloc='f3ed', count=1037)
Row(geoloc='f925', count=1038)
Row(geol

Row(geoloc='f69c', count=1123)
Row(geoloc='f3wg', count=1123)
Row(geoloc='cc7k', count=1123)
Row(geoloc='c3dk', count=1123)
Row(geoloc='f4vf', count=1123)
Row(geoloc='ccwh', count=1123)
Row(geoloc='c4m4', count=1123)
Row(geoloc='cf01', count=1123)
Row(geoloc='f4ef', count=1123)
Row(geoloc='f94e', count=1123)
Row(geoloc='f98p', count=1123)
Row(geoloc='f6wu', count=1123)
Row(geoloc='c452', count=1123)
Row(geoloc='f89r', count=1123)
Row(geoloc='f9tw', count=1123)
Row(geoloc='c9tm', count=1123)
Row(geoloc='f4t7', count=1123)
Row(geoloc='cfb9', count=1123)
Row(geoloc='f3z7', count=1123)
Row(geoloc='f3rw', count=1123)
Row(geoloc='cddp', count=1123)
Row(geoloc='f9wc', count=1123)
Row(geoloc='f69x', count=1123)
Row(geoloc='f4qe', count=1123)
Row(geoloc='fd2d', count=1123)
Row(geoloc='cff5', count=1123)
Row(geoloc='cc5w', count=1123)
Row(geoloc='f6es', count=1123)
Row(geoloc='f6ee', count=1123)
Row(geoloc='f4wg', count=1124)
Row(geoloc='f9fn', count=1124)
Row(geoloc='fd2b', count=1124)
Row(geol

Row(geoloc='fd01', count=1381)
Row(geoloc='f67c', count=1381)
Row(geoloc='f6jh', count=1381)
Row(geoloc='f6w6', count=1381)
Row(geoloc='fd77', count=1382)
Row(geoloc='f1b5', count=1382)
Row(geoloc='f9cg', count=1382)
Row(geoloc='f9cv', count=1382)
Row(geoloc='f3xt', count=1382)
Row(geoloc='f3re', count=1382)
Row(geoloc='f9gk', count=1382)
Row(geoloc='fd0y', count=1382)
Row(geoloc='f6pb', count=1382)
Row(geoloc='f3yp', count=1383)
Row(geoloc='f98z', count=1383)
Row(geoloc='f18v', count=1383)
Row(geoloc='f99m', count=1383)
Row(geoloc='f4e8', count=1383)
Row(geoloc='f318', count=1384)
Row(geoloc='cdzg', count=1385)
Row(geoloc='f9mx', count=1385)
Row(geoloc='f3th', count=1385)
Row(geoloc='ccud', count=1386)
Row(geoloc='f3xj', count=1386)
Row(geoloc='9xgg', count=1386)
Row(geoloc='c3c8', count=1386)
Row(geoloc='cf47', count=1387)
Row(geoloc='f9cp', count=1387)
Row(geoloc='f4k6', count=1387)
Row(geoloc='f6s8', count=1388)
Row(geoloc='cf1z', count=1388)
Row(geoloc='f3ty', count=1388)
Row(geol

In [12]:
lat_long_list = [gh.decode(row) for row in snowy_location]
lat_long_list = [(float(row[0]), float(row[1])) for row in lat_long_list]
lat_long_list

[(59.0, -60.0),
 (59.0, -60.0),
 (59.0, -61.0),
 (59.0, -61.0),
 (59.0, -61.0),
 (59.0, -61.0),
 (59.0, -62.0),
 (59.0, -62.0),
 (56.0, -57.0),
 (58.0, -59.0),
 (57.0, -59.0),
 (57.0, -59.0),
 (57.0, -59.0),
 (57.0, -59.0),
 (57.0, -59.0),
 (57.0, -58.0),
 (56.0, -58.0),
 (57.0, -58.0),
 (56.0, -58.0),
 (57.0, -59.0),
 (57.0, -59.0),
 (57.0, -59.0),
 (57.0, -59.0),
 (57.0, -59.0),
 (56.0, -59.0),
 (57.0, -59.0),
 (56.0, -59.0),
 (59.0, -59.0),
 (59.0, -60.0),
 (59.0, -60.0),
 (59.0, -59.0),
 (58.0, -59.0),
 (59.0, -60.0),
 (58.0, -60.0),
 (59.0, -60.0),
 (59.0, -60.0),
 (59.0, -60.0),
 (59.0, -60.0),
 (59.0, -60.0),
 (58.0, -60.0),
 (59.0, -60.0),
 (58.0, -60.0),
 (58.0, -59.0),
 (58.0, -59.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (58.0, -59.0),
 (58.0, -59.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (58.0, -60.0),
 (59.0, -61.0),
 (59.0, -61.0),
 (59.0, -61.0),
 (59.0, -61.0),
 (59.0, 

In [None]:
for row in lat_long_list:
    


In [17]:
%%time
df.createOrReplaceTempView("nam_small")
snowy_location = spark.sql(
    f'''SELECT count(*)
        FROM nam_small 
        WHERE substr(Geohash,1,4) = 'c6ub'
    ''').collect()
print(f'Number of Locations = {len(snowy_location)}')

Number of Locations = 1
CPU times: user 72.3 ms, sys: 22 ms, total: 94.2 ms
Wall time: 4min 39s
