In [1]:
import unittest
from pyspark.sql import SparkSession
from pyspark.sql.functions import udf, col, array, lit
from sources import *

In [3]:
class CapstoneTest(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.spark = SparkSession.builder.master('local[2]').appName('my-local-testing-pyspark-context').getOrCreate()
   
    def test_load_clickstream(self):
        clickstream_df = self.spark.read.csv(sep=r'\t', 
                                        path='input_csv_datasets/mobile-app-clickstream_sample.tsv', 
                                        header=True)
        assert clickstream_df.count() == 39
        
    def test_load_purchases(self):
        purchases_df = self.spark.read.csv('input_csv_datasets/purchases_sample.csv', header=True)
        assert purchases_df.count() == 6
    
    def test_udf(self):
        clickstream_df = self.spark.read.csv(sep=r'\t', 
                                        path='input_csv_datasets/mobile-app-clickstream_sample.tsv', 
                                        header=True)
        df = parse_attributes(clickstream_df)
        
        rows = df.select('campaignId', 'channelId').collect()
        campaign_ids = [row.campaignId for row in rows]
        channel_ids = [row.channelId for row in rows]
        
        assert 'cmp1' in campaign_ids
        assert 'cmp2' in campaign_ids
        assert 'Google Ads' in channel_ids
        
    def test_check_users_in_sessions(self):
        sessions_df = get_session_df(self.spark)
        rows = sessions_df.select('userId').collect()
        ids = list(set([row.userId for row in rows]))
        assert 'u2' in ids 
        assert 'u1' in ids
        assert 'u3' in ids
    
    def test_check_session_in_purchases(self):
        df = provide_sessions_for_purchases(self.spark)
        assert df.count() == 6
    

In [6]:
unittest.main(argv=[''], verbosity=2, exit=False)

test_check_session_in_purchases (__main__.CapstoneTest) ... ok
  self._sock = None
ok
test_load_clickstream (__main__.CapstoneTest) ... ok
test_load_purchases (__main__.CapstoneTest) ... ok
  self._sock = None
ok

----------------------------------------------------------------------
Ran 5 tests in 2.361s

OK


<unittest.main.TestProgram at 0x1105958e0>