In [2]:
import aframe as af
import pandas as pd
import numpy as np
import math
columns = ["alias", "employment", "friendIds","gender","id","name","nickname","userSince"]

In [4]:
message_af = af.AFrame(dataverse='TinySocial', dataset='GleambookMessages')
user_af = af.AFrame(dataverse='TinySocial', dataset='GleambookUsers')
message_df = message_af.toPandas()
user_df = user_af.toPandas()

In [5]:
def test_head():
    for i in range(5):
        head = user_df.head()
        assert (head.shape[0] == 5)
        for col in ["alias", "employment", "friendIds", "id", "name", "userSince"]:
            assert (head.iloc[i][col] == user_df.iloc[i][col])
        for col in ["gender", "nickname"]:
            if (type(head.iloc[i][col])==float) and (type(user_df.iloc[i][col])==float): 
                x = float(head.iloc[i][col])
                y = float(user_df.iloc[i][col])
                if (not math.isnan(x)) and (not math.isnan(y)):
                    assert (head.iloc[i][col] == user_df.iloc[i][col])
            else:
                assert (head.iloc[i][col] == user_df.iloc[i][col])

In [6]:
def test_map():
    f = lambda x: len(str(x))
    names = user_df["name"]
    l_map = user_df['name'].map(f)
    count = l_map.shape[0]
    for i in range(count):
        assert (len(names[i]) == l_map[i])

In [7]:
def test_apply():
    f = lambda x: len(str(x))
    apply = user_df.apply(f)
    for (i,col) in enumerate(columns):
        count = sum([f(a) for a in user_df[col]])
        #print(apply[i], count)
        assert (apply[i] == count)

In [8]:
def test_elementwiseMap():
    real_len = [len(i) for i in user_df['name']]
    name_len = user_af['name'].map('length')
    collect = name_len.collect()
    size = collect.shape[0]
    for row in range(size):
        a = collect[0][row]
        assert (a == real_len[row])

In [9]:
def test_functionsWithArgumentsMap():
    name_contain = user_af['name'].map('contains', 'Suzan')
    collect = name_contain.collect()
    names = user_df['name']
    for (i,name) in enumerate(names):
        truth = 'Suzan' in name
        assert (truth == collect[0][i])

In [10]:
def test_tablewiseApply():
    message_af = af.AFrame(dataverse='TinySocial', dataset='GleambookMessages')
    fields = message_af.apply('get_object_fields')
    first_head = fields.head(1)
    real_fields = message_af.columns
    for i in first_head:
        for f in first_head[i]:
            assert (f['field-name'] == list(real_fields[i].keys())[0])
            if f['field-type'] == 'bigint':
                assert (list(real_fields[i].values())[0] == 'int64')
            else:
                assert (f['field-type'] == list(real_fields[i].values())[0])    

In [11]:
def test_unnest():
    output = user_af.unnest(user_af['friendIds']).head()#user_af.unnest(user_af['friendIds'], appended=True, name='friendID').head()
    real = pd.DataFrame({'alias':user_df.alias.repeat(user_df.friendIds.str.len()),'friendID':np.concatenate(user_df.friendIds.values)})
    real_head = list(real['friendID'].loc[[4,5]].head())
    for (i, el) in enumerate(output[0]):
        assert(el == real_head[i])

In [12]:
def test_add():
    real = user_af['id'].head()[0]
    output1 = user_af['id'].add(3).head()[0]
    output2 = (user_af['id']+3).head()[0]
    for i in range(5):
        assert(real[i]+3 == output1[i])
        assert(real[i]+3 == output2[i])

In [13]:
def test_mul():
    real = user_af['id'].head()[0]
    output = user_af['id'].mul(3).head()[0]
    for i in range(5):
        assert(real[i]*3 == output[i])

In [14]:
def test_toPandas():
    output_df = user_af.toPandas().head()
    real_df = user_df.head()
    output_columns = list(user_af.toPandas().columns)
    real_columns = list(user_df.columns)
    testing_columns = ['alias', 'gender', 'id', 'name', 'nickname']
    for i in range(len(real_columns)):
        assert(output_columns[i] == real_columns[i])
    for col in testing_columns:
        for a in range(5):
            out = output_df[col][a]
            real = real_df[col][a]
            if type(out) == float or type(real) == float:
                if math.isnan(out):
                    out = 0.0
                if math.isnan(real):
                    real = 0.0
            assert(out == real)

In [25]:
def test_withColumn():
    output = user_af.withColumn('id_3', user_af['id']+3).head()['id_3']
    real = user_df.sort_values(by=['id']).head()['id']
    for (i,el) in enumerate(list(output)):
        assert(output.iloc[i] == real.iloc[i]+3)

In [26]:
def test_persisting():
    message_af = af.AFrame(dataverse='TinySocial', dataset='GleambookMessages')
    user_af = af.AFrame(dataverse='TinySocial', dataset='GleambookUsers')
    test_toPandas()
    test_withColumn()

In [43]:
'''tmp = user_af.withColumn('id_3', user_af['id']+3)
new_af = tmp.persist('UserCopy')
def test_persist():
    print(new_af.head())
    print(tmp.head())
    #for col in tmp.columns:
    #    print(tmp[col], new_af[col])
    #    print('-----------------------------------------------------------------------------------------------------------')
    
test_persist()'''

"tmp = user_af.withColumn('id_3', user_af['id']+3)\nnew_af = tmp.persist('UserCopy')\ndef test_persist():\n    print(new_af.head())\n    print(tmp.head())\n    #for col in tmp.columns:\n    #    print(tmp[col], new_af[col])\n    #    print('-----------------------------------------------------------------------------------------------------------')\n    \ntest_persist()"

In [27]:
def test():
    test_head()
    test_map()
    #test_apply()
    test_elementwiseMap()
    test_functionsWithArgumentsMap()
    test_tablewiseApply()
    test_unnest()
    test_add()
    test_mul()
    test_persisting()

In [28]:
if __name__ == "__main__":
    test()
    print("Every test passed!")

Every test passed!
