This turtorial can be found at this link:
    
https://www.kinetica.com/docs/udf/python/examples/dist_noncuda_sum_of_squares/dist_noncuda_sum_of_squares.html

This turtorial uses two of numeric data colomns $x_1$ and $x_2$ from input table, UDF is used to computer a column $y={x_1}^2+{x_2}^2 $ and save it in to output table

## udf_sos_init.py: creates the input & output tables and loads test data

#### Connect to Kinetica

In [1]:
import sys
sys.path.append("/opt/gpudb/udf/api/python") # to access kinetica_proc.py file
import collections
import gpudb
import random
KINETICA_HOST = 'p4.rewreu.org'
KINETICA_PORT = '9191'
INPUT_TABLE = 'udf_sos_in_table'
OUTPUT_TABLE = 'udf_sos_out_table'
h_db = gpudb.GPUdb(encoding = 'BINARY', host = KINETICA_HOST, port = KINETICA_PORT)

#### Create input data table

Creat a table named with "udf_sos_in_table", generated 1000 rows of random numbers into x1 and x2 colomns. Then create an output table named "udf_sos_out_table". 

Run the script below, and check "udf_sos_in_table" in gadmin.

In [2]:
input_type = """
{
   "type": "record",
   "name": "input_type",
   "fields": [
      {"type":"float","name":"x1"},
      {"type":"int","name":"id"},
      {"type":"float","name":"x2"}
   ]
}  """.replace(' ', '').replace('\n', '')
output_type = """
{
   "type": "record",
   "name": "out_type",
   "fields": [
    {"type":"int","name":"id"},
      {"name":"y","type":"float"}
   ]
}  """.replace(' ', '').replace('\n', '')
type_properties = {"id":["primary_key"]}
type_id = h_db.create_type(type_definition=input_type, label=INPUT_TABLE + '_lbl', properties=type_properties)['type_id']
if h_db.has_table(table_name=INPUT_TABLE)['table_exists']:
    h_db.clear_table(table_name=INPUT_TABLE)
h_db.create_table(table_name=INPUT_TABLE, type_id=type_id)
## Insert input data
encoded_obj_list = []
for val in range(1000):
    datum = collections.OrderedDict()
    datum["x1"] = random.gauss(1, 1)
    datum["x2"] = random.gauss(1, 2)
    datum["id"] = val
    encoded_obj_list.append(h_db.encode_datum(input_type, datum))
h_db.insert_records(table_name=INPUT_TABLE, data=encoded_obj_list, list_encoding='binary', options={})
## Create output data table

type_id = h_db.create_type(type_definition=output_type, label=OUTPUT_TABLE + '_lbl', properties=type_properties)['type_id']
if h_db.has_table(table_name=OUTPUT_TABLE)['table_exists']:
    h_db.clear_table(table_name=OUTPUT_TABLE)
h_db.create_table(table_name=OUTPUT_TABLE, type_id=type_id)

{u'is_collection': False,
 'status_info': {u'data_type': u'create_table_response_avro',
  u'message': u'',
  'response_time': 0.00435,
  u'status': u'OK'},
 u'table_name': u'udf_sos_out_table',
 u'type_id': u'2564890278364254450'}

## A look at the input table

In [3]:
%%bash 
./kisql/kisql -h p4.rewreu.org -sql \
"""
SELECT id,x1,x2
FROM udf_sos_in_table
LIMIT 10
"""

Connection successful
Catalog [KINETICA]
Time 0.269
+------+---------------+--------------+
|   id |            x1 |           x2 |
+------+---------------+--------------+
|    0 |     1.2324717 |   -2.1719012 |
|    1 |     1.1998428 |     -3.97436 |
|    2 |     0.7730517 |    2.4205842 |
|    3 |   -0.22777738 |    -2.601644 |
|    4 |    0.41525286 |   -0.9269873 |
|    5 |     1.7911818 |    2.0021868 |
|    6 |    0.58970404 |     3.326359 |
|    7 |     1.2972246 |    2.9697058 |
|    8 |    0.01205742 |    0.5122146 |
|    9 |      1.574601 |   -1.2934016 |
+------+---------------+--------------+
Rows read = 10
Exec time 0.046 Fetch time 0.018


## Below is the proc file passed into UDF

run the script below, a python file named udf_sos_proc.py will be saved in the current folder

In [4]:
%%writefile udf_sos_proc.py
################################################################################
#                                                                              #
# Kinetica UDF Sum of Squares Example UDF                                      #
# ---------------------------------------------------------------------------- #
# This UDF takes pairs of input & output tables, computing the sum of the      #
# squares of all the columns for each input table and saving the resulting     #
# sums to the first column of the corresponding output table.                  #
#                                                                              #
################################################################################
import sys
import math
from kinetica_proc import ProcData

proc_data = ProcData()
in_table, out_table = proc_data.input_data[0], proc_data.output_data[0]
# Extend the output table by the number of record entries in the input table
out_table.size = in_table.size

# Use the first column in the output table as the output column
# Loop through all the input table columns
for i in xrange(0, in_table.size):
    out_table["y"][i] = (in_table["x1"][i]) ** 2 + (in_table["x2"][i]) ** 2
    out_table["id"][i] = in_table["id"][i]

proc_data.complete()

Overwriting udf_sos_proc.py


## Execute the UDF

run the script while watch 

In [5]:
import sys
import gpudb

KINETICA_HOST = 'p4.rewreu.org'
KINETICA_PORT = '9191'
proc_name = 'udf_sos_proc'
file_name = proc_name + '.py'
# Read proc code in as bytes and add to a file data array
files = {}
with open(file_name, 'rb') as file:
    files[file_name] = file.read()
# Connect to Kinetica
h_db = gpudb.GPUdb(encoding='BINARY', host=KINETICA_HOST, port=KINETICA_PORT)
# Remove proc if it exists from a prior registration
if h_db.has_proc(proc_name)['proc_exists']:
    h_db.delete_proc(proc_name)
print "Registering proc..."
response = h_db.create_proc(proc_name, 'distributed', files, 'python', [file_name], {})
print response

print "Executing proc..."
response = h_db.execute_proc(proc_name, {}, {}, [INPUT_TABLE], {}, [OUTPUT_TABLE], {})
print response


Registering proc...
{'status_info': {u'status': u'OK', u'data_type': u'create_proc_response', u'message': u'', 'response_time': 0.00444}, u'proc_name': u'udf_sos_proc'}
Executing proc...
{'status_info': {u'status': u'OK', u'data_type': u'execute_proc_response', u'message': u'', 'response_time': 0.00158}, u'run_id': u'19'}


## Look at the result

In [6]:
%%bash 
./kisql/kisql -h p4.rewreu.org -sql \
"""
select in_t.x1, in_t.x2, out_t.y as UDF_result, 
FLOAT(in_t.x1 * in_t.x1 + in_t.x2 * in_t.x2) as Query_result,
FLOAT(in_t.x1 * in_t.x1 + in_t.x2 * in_t.x2) - out_t.y as diff
from udf_sos_out_table as out_t
inner join udf_sos_in_table as in_t on in_t.id = out_t.id
limit 10
"""

Connection successful
Catalog [KINETICA]
Time 0.193
Rows read = 0
Exec time 0.067 Fetch time 0.001
