# Title


Text

## Initialization
This section initializes the notebook.

### Dependencies
Here, all necessary libraries are imported.

In [7]:
#Add all dependencies to PYTHON_PATH
import sys
sys.path.append("/usr/lib/spark/python")
sys.path.append("/usr/lib/spark/python/lib/py4j-0.10.4-src.zip")
sys.path.append("/usr/lib/python3/dist-packages")
sys.path.append("/data/local/jupyterhub/modules/python")

#Define environment variables
import os
os.environ["HADOOP_CONF_DIR"] = "/etc/hadoop/conf"
os.environ["PYSPARK_PYTHON"] = "python3"
os.environ["PYSPARK_DRIVER_PYTHON"] = "ipython"

import subprocess

#Load PySpark to connect to a Spark cluster
from pyspark import SparkConf, SparkContext
from hdfs import InsecureClient
from tempfile import TemporaryFile

#from osgeo import gdal
#To read GeoTiffs as a ByteArray
from io import BytesIO
from rasterio.io import MemoryFile

import numpy
import numpy as np
import pandas
import datetime
import matplotlib.pyplot as plt
import rasterio
from rasterio import plot
from os import listdir
from os.path import isfile, join
import scipy.linalg

### Configuration
This configuration determines whether functions print logs during the execution.

In [8]:
debugMode = True

### Connect to Spark
Here, the Spark context is loaded, which allows for a connection to HDFS.

In [9]:
appName = "plot_GeoTiff"
masterURL = "spark://emma0.emma.nlesc.nl:7077"

#A context needs to be created if it does not already exist
try:
    sc.stop()
except NameError:
    print("A new Spark Context will be created.")

sc = SparkContext(conf = SparkConf().setAppName(appName).setMaster(masterURL))
conf = sc.getConf()

## Subtitle

In [10]:
def getModeAsArray(filePath):
    data = sc.binaryFiles(filePath).take(1)
    byteArray = bytearray(data[0][1])
    memfile = MemoryFile(byteArray)
    dataset = memfile.open()
    array = np.array(dataset.read()[0])
    memfile.close()
    array = array.flatten()
    array = array[~np.isnan(array)]
    return array

In [11]:
def detemineNorm(array1, array2):
    if array1.shape != array2.shape:
        print("Error: shapes are not the same: (" + str(array1.shape) + " vs " + str(array2.shape) + ")")
        return 0
    value = scipy.linalg.norm(array1 - array2)
    if value > 1:
        value = scipy.linalg.norm(array1 + array2)
    return value

In [26]:
textFile1 = sc.textFile("hdfs:///user/pheno/svd/spark/BloomGridmetLeafGridmet3/U.csv").map(lambda line: (line.split(','))).map(lambda m: [ float(i) for i in m]).collect()

In [79]:
textFile2 = sc.textFile("hdfs:///user/emma/svd/BloomGridmetLeafGridmet/U.csv").map(lambda line: (line.split(','))).map(lambda m: [ float(i) for i in m]).collect()

In [80]:
array1 = numpy.array(textFile1, dtype=float)
vector11 = array1.T[0]
vector12 = array1.T[1]
vector13 = array1.T[2]

In [81]:
array2 = numpy.array(textFile2, dtype=float).reshape(483850, 37)
vector21 = array2.T[0]
vector22 = array2.T[1]
vector23 = array2.T[2]

In [82]:
print(detemineNorm(vector11, vector21))
print(detemineNorm(vector12, vector22))
print(detemineNorm(vector13, vector23))

1.4313286176935427
1.4087166733898626
1.4221226112304557


In [90]:
array1 = getModeAsArray("hdfs:///user/pheno/svd/spark/BloomGridmetLeafGridmet/svd_u_0_3.tif")
array2 = getModeAsArray("hdfs:///user/emma/svd/BloomGridmetLeafGridmet/ModeU01.tif")
detemineNorm(array1, array2)

0.007051987027367063

In [91]:
print(detemineNorm(array1, vector11))
print(detemineNorm(array1, vector21))
print(detemineNorm(array2, vector11))
print(detemineNorm(array2, vector21))

0.18399346566069125
1.4324139409648913
0.18350224830364414
1.4324954144749305


### BloomFinalLowPR and LeafFinalLowPR

In [11]:
array1 = getModeAsArray("hdfs:///user/emma/svd/BloomFinalLowPRLeafFinalLowPR/ModeU01.tif")
array2 = getModeAsArray("hdfs:///user/pheno/svd/spark/BloomLowPRLeafLowPR/svd_u_0_3.tif")
detemineNorm(array1, array2)

9.28098946087584e-16

In [12]:
array1 = getModeAsArray("hdfs:///user/emma/svd/BloomFinalLowPRLeafFinalLowPR/ModeU02.tif")
array2 = getModeAsArray("hdfs:///user/pheno/svd/spark/BloomLowPRLeafLowPR/svd_u_1_3.tif")
detemineNorm(array1, array2)

8.816536190170412e-13

In [13]:
array1 = getModeAsArray("hdfs:///user/emma/svd/BloomFinalLowPRLeafFinalLowPR/ModeU01.tif")
array2 = getModeAsArray("hdfs:///user/pheno/svd/spark/BloomLowPRLeafLowPR/svd_u_0_3.tif")
detemineNorm(array1, array2)

9.28098946087584e-16

### BloomGridmet and LeafGridmet

In [87]:
array1 = getModeAsArray("hdfs:///user/emma/svd/BloomGridmetLeafGridmet/ModeU01.tif")
array2 = getModeAsArray("hdfs:///user/pheno/svd/spark/BloomGridmetLeafGridmet/svd_u_0_3.tif")
detemineNorm(array1, array2)

0.007051987027367063

In [88]:
array1 = getModeAsArray("hdfs:///user/emma/svd/BloomGridmetLeafGridmet/ModeU02.tif")
array2 = getModeAsArray("hdfs:///user/pheno/svd/spark/BloomGridmetLeafGridmet/svd_u_1_3.tif")
detemineNorm(array1, array2)

0.5077786643397328

In [89]:
array1 = getModeAsArray("hdfs:///user/emma/svd/BloomGridmetLeafGridmet/ModeU03.tif")
array2 = getModeAsArray("hdfs:///user/pheno/svd/spark/BloomGridmetLeafGridmet/svd_u_2_3.tif")
detemineNorm(array1, array2)

0.496526838642191

End of Notebook