In [1]:
#import 
import os
import sys
import unittest
import pandas as pd

#import customised modules
#define path
sys.path.append(os.path.abspath(os.path.join('..')))

##import relevant modules
try:
    from scripts.oracle_db import process_df_for_db
except ImportError:
    print('Could not import module from `scripts/`.')


class TestProcessDfForDb(unittest.TestCase):

    def setUp(self):
        #set a dummy df_path for each test
        self.dummy_df_path = "dummy/path/to/file.csv"

    def test_column_rename(self):
        #create a sample df with all expected columns, including 'date'
        data = {'date': ['2023-01-01'],
                'bank_name': ['New Bank'],
                'rating': [5],
                'review_text': ['good review'],
                'source': ['web']}
        df = pd.DataFrame(data)

        original_read_csv = pd.read_csv
        pd.read_csv = lambda x: df

        try:
            processed_df = process_df_for_db(self.dummy_df_path)
            self.assertIn('review_date', processed_df.columns)
            self.assertNotIn('date', processed_df.columns)
        finally:
            pd.read_csv = original_read_csv

    def test_bank_id_assignment(self):
        #create a sample df with all expected columns
        data = {'date': ['2023-01-01', '2023-01-02', '2023-01-03'],
                'bank_name': ['Abyssinia', 'Commercial', 'Dashen'], # Use bank names from the function
                'rating': [5, 3, 1],
                'review_text': ['review 1', 'review 2', 'review 3'],
                'source': ['web', 'app', 'web']}
        df = pd.DataFrame(data)

        original_read_csv = pd.read_csv
        pd.read_csv = lambda x: df

        try:
            processed_df = process_df_for_db(self.dummy_df_path)
            self.assertEqual(processed_df.loc[processed_df['bank_name'] == 'Abyssinia', 'bank_id'].iloc[0], '1')
            self.assertEqual(processed_df.loc[processed_df['bank_name'] == 'Commercial', 'bank_id'].iloc[0], '2')
            self.assertEqual(processed_df.loc[processed_df['bank_name'] == 'Dashen', 'bank_id'].iloc[0], '3')
        finally:
            pd.read_csv = original_read_csv

    def test_review_id_generation(self):
        #create a sample df with all expected columns
        data = {'date': ['2023-01-01', '2023-01-02', '2023-01-03'],
                'bank_name': ['New Bank', 'National Bank', 'District Bank'],
                'rating': [5, 4, 3],
                'review_text': ['review A', 'review B', 'review C'],
                'source': ['web', 'app', 'web']}
        df = pd.DataFrame(data)

        original_read_csv = pd.read_csv
        pd.read_csv = lambda x: df

        try:
            processed_df = process_df_for_db(self.dummy_df_path)
            self.assertIn('review_id', processed_df.columns)
            self.assertEqual(processed_df['review_id'].nunique(), len(processed_df))
        finally:
            pd.read_csv = original_read_csv

    def test_column_order(self):
        #create a sample df with all expected columns in a different initial order
        data = {'source': ['web'], 'review_text': ['review A'], 'rating': [5],
                'review_date': ['2023-01-01'], 'bank_name': ['New Bank'],
                'bank_id': ['1'], 'review_id': ['123456']}
        df = pd.DataFrame(data)

        original_read_csv = pd.read_csv
        pd.read_csv = lambda x: df

        try:
            processed_df = process_df_for_db(self.dummy_df_path)
            expected_order = ['review_id','bank_id', 'bank_name','rating',	'review_date','review_text', 'source']
            self.assertListEqual(list(processed_df.columns), expected_order)
        finally:
            pd.read_csv = original_read_csv


#run the tests
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

....
----------------------------------------------------------------------
Ran 4 tests in 0.018s

OK
