# Spark with GIS visualization In Kaggle Notebooks.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
required_packages = {"keplergl","geopandas","h3","geocoder"}

import pkg_resources
for lib in required_packages - {pkg.key for pkg in pkg_resources.working_set}:
  print(f"installing {lib}")
  %pip install -q --upgrade pip
  %pip install --use-feature=2020-resolver -q $lib

In [None]:
%%sh
df

In [None]:
!sh -c ls

In [None]:
!pip install pyspark

In [None]:
import pyspark

MAX_MEMORY = "8g"  # 24 gives OOM here.

spark = (pyspark.sql.SparkSession.builder.appName("MyApp") 
    .config("spark.jars.packages", "io.delta:delta-core_2.12:0.8.0") 
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") 
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") 
    .config("spark.executor.memory", MAX_MEMORY) 
    .config("spark.driver.memory", MAX_MEMORY) 
    .enableHiveSupport() 
    .getOrCreate()        
    )
spark

In [None]:
df = spark.createDataFrame([{'hi':'hello','w':'world'}])

In [None]:
 df.write.format('delta').mode('overwrite').option("mergeSchema", "true").save('/tmp/delta1')

In [None]:
df = spark.read.format('delta').load('/tmp/delta1')
df.createOrReplaceTempView('d1')
spark.sql('select * from d1').toPandas()

In [None]:
import keplergl

In [None]:
map_1 = keplergl.KeplerGl(height=600)
map_1

In [None]:
resize_for_databricks = '''
  <script>
    var targetHeight = "{self.height or 600}px";
    window.setInterval(function() {{
      if (document.body && document.body.style && document.body.style.height !== targetHeight) {{
        document.body.style.height = targetHeight;
      }}
    }}, 2500)
    </script>
    '''
orig_html   = keplergl.KeplerGl._repr_html_(map_1)
better_html = (str(orig_html,'utf-8') + resize_for_databricks)

import base64
b64 = base64.b64encode(better_html.encode('utf-8'))
src = f"data:text/html;base64,{b64.decode('utf-8')}"
base64d_html = f'<iframe src="{src}" style="width:100%; height: 600px">'


In [None]:

from IPython.core.display import HTML
HTML(base64d_html)