In [1]:
import aframe as af
import pandas as pd
import findspark
findspark.init()
import pyspark
from pyspark.sql import Row
from pyspark.sql import SQLContext
from pyspark import SparkContext
from pyspark.sql.functions import *
import numpy as np

In [2]:
message_af = af.AFrame(dataverse='TinySocial', dataset='GleambookMessages')
user_af = af.AFrame(dataverse='TinySocial', dataset='GleambookUsers')
message_df = message_af.toPandas()
user_df = user_af.toPandas()

In [None]:
sc = SparkContext(appName="app")
sqlContext = SQLContext(sc)

In [None]:
message_spark = sqlContext.createDataFrame(message_df,message_df.columns.tolist())
user_spark = sqlContext.createDataFrame(user_df.fillna('None'),user_df.columns.tolist())

## Pandas DataFrame Functions

In [3]:
user_df.head()

Unnamed: 0,alias,employment,friendIds,gender,id,name,nickname,userSince
0,Margarita,"[{'organizationName': 'Codetechno', 'startDate...","[2, 3, 6, 10]",F,1,MargaritaStoddard,Mags,2012-08-20T10:10:00.000Z
1,Isbel,"[{'organizationName': 'Hexviafind', 'startDate...","[1, 4]",,2,IsbelDull,Izzy,2011-01-22T10:10:00.000Z
2,Emory,"[{'organizationName': 'geomedia', 'startDate':...","[1, 5, 8, 9]",,3,EmoryUnk,,2012-07-10T10:10:00.000Z
3,Nicholas,"[{'organizationName': 'Zamcorporation', 'start...",[2],,4,NicholasStroh,,2010-12-27T10:10:00.000Z
4,Von,"[{'organizationName': 'Kongreen', 'startDate':...","[3, 6, 10]",,5,VonKemble,,2010-01-05T10:10:00.000Z


### Elementwise functions

In [None]:
f = lambda x: len(str(x))
user_df['name'].map(f)

In [None]:
user_df['alias'].map(f)

### Tablewise functions

In [None]:
user_df.apply(f)

## AFrame Functions

### Elementwise Functions

In [4]:
name_len = user_af['name'].map('length')

In [5]:
name_len.query

'SELECT VALUE length(t.name) FROM TinySocial.GleambookUsers t;'

In [6]:
name_len.collect()

Unnamed: 0,0
0,17
1,9
2,8
3,13
4,9
5,11
6,14
7,12
8,14
9,9


### Functions with arguments

In [None]:
name_contain = user_af['name'].map('contains', 'Suzan')

In [None]:
name_contain.query

In [None]:
name_contain.collect()

### Tablewise Functions

In [7]:
fields = message_af.apply('get_object_fields')

In [8]:
fields.query

'SELECT VALUE get_object_fields(t) FROM TinySocial.GleambookMessages t;'

In [9]:
fields.head(1)

Unnamed: 0,0,1,2,3,4
0,"{'field-name': 'messageId', 'field-type': 'big...","{'field-name': 'authorId', 'field-type': 'bigi...","{'field-name': 'inResponseTo', 'field-type': '...","{'field-name': 'senderLocation', 'field-type':...","{'field-name': 'message', 'field-type': 'strin..."


### PySpark

In [None]:
user_spark.show()

In [None]:
user_spark.select(explode(user_spark['friendIds'])).show()

In [None]:
user_spark.withColumn('friend', explode('friendIds')).show(2)

### AFrame Unnest Data

In [None]:
user_af.unnest(user_af['friendIds']).head()

In [None]:
user_af.unnest(user_af['friendIds'], appended=True, name='friend').query

In [None]:
user_af.unnest(user_af['friendIds'], appended=True, name='friendID').head()

### Arithmetic Operations

In [10]:
(user_af['id'] + 3).query

'SELECT VALUE t.id + 3 FROM TinySocial.GleambookUsers t;'

In [11]:
user_af['id'].add(3).head()

Unnamed: 0,0
0,4
1,5
2,6
3,7
4,8


In [12]:
user_af['id'].mul(3).head()

Unnamed: 0,0
0,3
1,6
2,9
3,12
4,15


## Persisting Data in AFrame

In [None]:
message_af = af.AFrame(dataverse='TinySocial', dataset='GleambookMessages')
user_af = af.AFrame(dataverse='TinySocial', dataset='GleambookUsers')

In [None]:
user_af.toPandas().head(2)

In [None]:
tmp = user_af.withColumn('id_3', user_af['id']+3)
tmp.head()

In [None]:
tmp.query

In [None]:
new_af = tmp.persist('UserCopy')

In [None]:
type(new_af)

In [None]:
new_af._dataverse

In [None]:
new_af._dataset

In [None]:
new_af.toPandas()

In [None]:
af.AFrame.drop(new_af)

In [None]:
tmp = user_af.unnest(user_af['friendIds'], appended=True, name='friendID')
tmp.head(2)

In [None]:
copy_af = tmp.persist('UserCopy')
copy_af.toPandas().head()

In [None]:
af.AFrame.drop(copy_af)