# Getting Started with RasterFrames Notebook

## Setup Spark Environment

In [1]:
import pyrasterframes
import pyrasterframes.rf_ipython  # enables nicer visualizations
spark = pyrasterframes.get_spark_session()
spark

### Get a PySpark DataFrame from [open data](https://landsatonaws.com/)

In [2]:
uri = 'http://landsat-pds.s3.amazonaws.com/c1/L8/015/041/'\
      'LC08_L1TP_015041_20190305_20190309_01_T1/'\
      'LC08_L1TP_015041_20190305_20190309_01_T1_B2.TIF'
df = spark.read.rastersource(uri)

In [3]:
df.count()

930

In [4]:
df.printSchema()

root
 |-- tile_path: string (nullable = false)
 |-- tile: struct (nullable = true)
 |    |-- tile_context: struct (nullable = false)
 |    |    |-- extent: struct (nullable = false)
 |    |    |    |-- xmin: double (nullable = false)
 |    |    |    |-- ymin: double (nullable = false)
 |    |    |    |-- xmax: double (nullable = false)
 |    |    |    |-- ymax: double (nullable = false)
 |    |    |-- crs: struct (nullable = false)
 |    |    |    |-- crsProj4: string (nullable = false)
 |    |-- tile: tile (nullable = false)



In [5]:
#Look at a sample
pandas_df = df.select(df.tile).sample(0.05).toPandas()
pandas_df

Unnamed: 0,tile
0,"(((504825.0, 3142635.0, 512505.0, 3150315.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n ...\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]]))"
1,"(((497145.0, 3127275.0, 504825.0, 3134955.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n ...\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]]))"
2,"(((643065.0, 3127275.0, 650745.0, 3134955.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n ...\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]]))"
3,"(((650745.0, 3127275.0, 658425.0, 3134955.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n ...\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]]))"
4,"(((489465.0, 3119595.0, 497145.0, 3127275.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n ...\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]]))"
5,"(((512505.0, 3119595.0, 520185.0, 3127275.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[ 0 0 0 ... 34835 35040 34929]\n [ 0 0 0 ... 34438 33838 34300]\n [ 0 0 0 ... 34169 34047 35019]\n ...\n [ 0 0 0 ... 28880 28858 28860]\n [ 0 0 0 ... 28868 28882 28825]\n [ 0 0 0 ... 28892 28830 28751]]))"
6,"(((566265.0, 3088875.0, 573945.0, 3096555.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[25072 24691 24206 ... 14174 14055 13722]\n [24987 24851 24668 ... 13488 13960 14160]\n [25287 25327 25125 ... 14932 14992 15673]\n ...\n [18261 18773 18356 ... 28507 29489 29209]\n [18898 19475 18998 ... 27742 28559 28831]\n [20193 20706 19194 ... 27191 27340 27728]]))"
7,"(((650745.0, 3081195.0, 658425.0, 3088875.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[37009 36664 36718 ... 30211 31565 32545]\n [37220 36770 36653 ... 26997 30338 32433]\n [37509 36985 36639 ... 26815 29523 31915]\n ...\n [21496 22198 22629 ... 50496 49952 49069]\n [21319 21920 22650 ... 44582 47035 48996]\n [21015 21923 23135 ... 40391 41619 46114]]))"
8,"(((481785.0, 3073515.0, 489465.0, 3081195.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n ...\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]\n [0 0 0 ... 0 0 0]]))"
9,"(((643065.0, 3073515.0, 650745.0, 3081195.0), (+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs ,)), Tile(dimensions=[256, 256], cell_type=CellType(uint16raw, None), cells=[[41963 41787 41288 ... 19905 19831 20434]\n [41612 41503 41209 ... 20569 20559 20786]\n [41520 41262 41159 ... 20901 21337 21310]\n ...\n [35591 34598 33741 ... 28094 28141 28347]\n [35111 34607 34556 ... 28344 28428 28404]\n [35020 35115 35496 ... 28928 28943 28659]]))"
