## GA - RunPythonScript Demo

### Mit GIS verbinden:

In [None]:
from arcgis.gis import GIS
gis = GIS("home")

### Layer für Analyse auswählen

In [None]:
input_layers = []

In [None]:
bdfs = gis.content.search('thpa_bdfs')[0]
bdfs

In [None]:
bdfs.layers

In [None]:
input_layers.append(bdfs.layers[0])

### Script als Datei zwischenspeichern

In [None]:
%%writefile tmp.py
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.clustering import KMeans
from datetime import datetime

# Specify the URL to the input layer
#bdfs_url = 'https://azure-portal-01.eggits.net/ga/rest/services/DataStoreCatalogs/bigDataFileShares_thpa_bdfs/BigDataCatalogServer/VBB'

# Load the BDFS layer into a DataFrame
#vbb_positions = spark.read.format("webgis").load(bdfs_url)

# Run Detect Incidents to find all train locations where delay status has increased
exp = "(!IsEmpty(TrackFieldWindow(\"rt\",-1,0)[0]) && ($feature[\"rt\"] - TrackFieldWindow(\"rt\",-1,0)[0] > 1))"
delay_incidents = geoanalytics.detect_incidents(input_layer = layers[0], track_fields = ["i"], start_condition_expression = exp, output_mode = "Incidents")

# Combine the x and y columns in the DataFrame into a single column called "features"
assembler = VectorAssembler(inputCols=["x", "y"], outputCol="features")
delay_incidents = assembler.transform(delay_incidents)

# Fit a k-means model with 20 clusters using the "features" column of the cdetected incidents
kmeans = KMeans(k=20)
model = kmeans.fit(delay_incidents.select("features"))

# Add the cluster labels from the k-means model to the original DataFrame
delay_incidents_clusters = model.transform(delay_incidents)

# Write the result DataFrame to the spatiotemporal big data store
delay_incidents_clusters.write.format("webgis").save("VBB_Delay_Cluster_{0}".format(datetime.now().strftime("%m_%d_%Y_%H_%M_%S")))

### Datei als Text einlesen und in Variable speichern

In [None]:
from pathlib import Path
code = Path('tmp.py').read_text()
code

### Variable an RunPythonScript schicken

In [None]:
%time
from arcgis.geoanalytics.manage_data import run_python_script

run_python_script(code, layers=input_layers)