# Connecting to Myria

In [None]:
from myria import *
import numpy

# Load Myria extensions
%load_ext myria

# Create Jupyter Connection
%connect http://localhost:8753 http://localhost:8080

In [None]:
# Alternatively, create an ordinary Python connection to the Myria demo cluster
connection = MyriaConnection(rest_url='http://localhost:8753')
# Use this as the default connection
MyriaRelation.DefaultConnection = connection

In [None]:
# How many datasets are there on the server?
print len(connection.datasets())

In [None]:
# Let's look at the first dataset...
dataset = connection.datasets()[0]
print dataset['relationKey']['relationName']
print dataset['created']

In [None]:
# View data stored in this relation
MyriaRelation(dataset['relationKey'])

## Uploading data

In [None]:
%%query

-- Load from S3
florida = load("https://s3-us-west-2.amazonaws.com/myria-demo-data/fl_insurance_sample_2.csv",
csv(schema(
            id:int,
            geo:string,
            granularity:int,
            deductable:float,
            policyID:int, 
            construction:string,
            line:string,
            county:string,
            state:string,
            longitude:float,
            latitude:float,
            fl_site_deductible:float,
            hu_site_deductible:float,
            eq_site_deductible:float,
            tiv_2012:float,
            tiv_2011:float,
            fr_site_limit:float,
            fl_site_limit:float,
            hu_site_limit:float,
            eq_site_limit:float), skip=1));


clay_county = [from florida where county = 'CLAY COUNTY' emit *];

store(clay_county, insurance);

In [None]:
# Alternatively, you can upload directly from a Python string
name = {'userName': 'Brandon', 'programName': 'Demo', 'relationName': 'Books'}
schema = { "columnNames" : ["name", "pages"],
           "columnTypes" : ["STRING_TYPE","LONG_TYPE"] }

data = """Brave New World,288
Nineteen Eighty-Four,376
We,256"""

result = connection.upload_file(
    name, schema, data, delimiter=',', overwrite=True)

MyriaRelation(result['relationKey'], connection=connection)

## Working with relations:

In [None]:
# Using the previously-stored insurance relation
MyriaRelation("insurance")

In [None]:
# View details about this relation
relation = MyriaRelation("insurance")
print len(relation)
print relation.created_date
print relation.schema.names

In [None]:
# 1: Download as a Python dictionary
d = MyriaRelation("insurance").to_dict()
print 'First entry returned: %s' % d[0]['county']

In [None]:
# 2: Download as a Pandas DataFrame
df = MyriaRelation("insurance").to_dataframe()
print '%d entries with nonzero deductable' % len(df[df.eq_site_deductible > 0]) 

In [None]:
# 3: Download as a DataFrame and convert to a numpy array
array = MyriaRelation("insurance").to_dataframe().as_matrix()
print 'Mean site limit = %d' % array[:,4].mean()

## Working with Queries:

In [None]:
%%query --Embed MyriaL in Jupyter notebook by using the "%%query" prefix 

insurance = scan(insurance);

descriptives = [from insurance emit min(eq_site_deductible) as min_deductible, 
                                    max(eq_site_deductible) as max_deductible, 
                                    avg(eq_site_deductible) as mean_deductible, 
                                    stdev(eq_site_deductible) as stdev_deductible];

store(descriptives, descriptives);

In [None]:
# Grab the results of the most recent execution
query = _


In [None]:
query

In [None]:
low, high, destination = 543, 550, 'BoundRelation'

In [None]:
%%query
T1 = scan(TwitterK);
T2 = [from T1 where $0 > @low and $0 < @high emit $1 as x];
store(T2, @destination);

In [None]:
# View details about this relation
relation = MyriaRelation("BoundRelation")
print len(relation)
print relation.created_date
print relation.schema.names

## Python functions:

In [None]:
from myria import *
import numpy
import json
from myria.connection import MyriaConnection
from myria.relation import MyriaRelation
from myria.udf import MyriaPythonFunction
from raco.types import STRING_TYPE, BOOLEAN_TYPE, LONG_TYPE, BLOB_TYPE

#create connection

connection = MyriaConnection(rest_url='http://localhost:8753',execution_url='http://localhost:8080')

In [None]:
#register Python functions
py = myria.udf.FunctionTypes.PYTHON
outType= "LONG_TYPE"
def pyIsPrime(dt):
    import math
    n = dt[0][0]
    if n % 2 == 0 and n > 2: 
        return False
    for i in range(3, int(math.sqrt(n)) + 1, 2):
        if n % i == 0:
            return 0
    return 1

MyriaPythonFunction(pyIsPrime, outType).register()


In [None]:
#register a UDA
def udfSum(dt):
    import numpy as np
    tuplist = dt
    state = None
    for i in tuplist:
        imgid = i[2]
        subjid = i[1]
        img = np.asarray(i[3])
        shape = img.shape + (5,)
        if state is None:
            state = np.empty(shape)
            state[:,:,:,imgid]=img
        else:
            state[:,:,:,imgid]=img
    return (state)


MyriaPythonFunction(udfAgg, outType).register()
#define a UDA
q = MyriaQuery.submit("""uda foo(subjid,imgid, img) {
    [ b'' as tm];
   [udfAgg(subjid ,imgid, img )];
   [tm];
};
t = scan(public:adhoc:raw);
results = [from t emit t.subjid, t.imgid, foo(t.subjid, t.imgid,t.img) as vox];
STORE(results, results);""", connection=connection)
q.status

In [None]:
connection.get_functions()

In [None]:
connection.get_function('py')

In [None]:
q = MyriaQuery.submit(""" T1 = scan(TwitterK);
isPrime = [from T1 emit pyIsPrime(T1.src) as isPrime, T1.src, T1.dst];
store( isPrime, TwitterK_isPrime);""", connection=connection)
q.status

In [None]:
q = MyriaQuery.submit(""" T1 = scan(public:adhoc:TwitterK_isPrime);
primeCount = [from T1 emit  sum(T1.isPrime) as prime];
store( primeCount, primeCount);""", connection=connection)
q.status

In [None]:
# 1: Download as a Python dictionary
d = MyriaRelation("primeCount").to_dict()
print 'Number of Users with id that is prime: %s' % d[0]['prime']