In [1]:
import pandas as pd
import unittest

In [6]:
def data_extraction(file_path):
    data = pd.read_csv(file_path)
    return data

In [7]:
def data_transformation(data):
    data = data.drop_duplicates()
    data['billing_amount'] = data['billing_amount'].str.replace('$', '').astype(float)
    data['total_charges'] = data['billing_amount'] + data['tax_amount']
    return data

In [8]:
def data_loading(data, output_file):
    data.to_csv(output_file, index=False)

In [25]:
# Your test data file path
TEST_DATA_FILE = '/content/billing_data.csv'
OUTPUT_FILE = 'output_file.csv'

class TestDataPipeline(unittest.TestCase):

    def test_data_extraction(self):
        # Test case 1: Check if data_extraction function returns a DataFrame
        df = data_extraction(TEST_DATA_FILE)
        self.assertIsInstance(df, pd.DataFrame, "data_extraction didn't return a DataFrame")

        # Test case 2: Check if the DataFrame has the expected columns
        expected_columns = ['customer_id', 'billing_amount', 'tax_amount']
        self.assertListEqual(df.columns.tolist(), expected_columns, "Unexpected columns in the extracted DataFrame")

        # Test case 3: Check if the DataFrame is not empty
        self.assertFalse(df.empty, "Extracted DataFrame is empty")

    def test_data_transformation(self):
        # Test case 1: Check if data_transformation function returns a DataFrame
        df = data_extraction(TEST_DATA_FILE)
        transformed_df = data_transformation(df)
        self.assertIsInstance(transformed_df, pd.DataFrame, "data_transformation didn't return a DataFrame")

        # Test case 2: Check if the transformed DataFrame has the expected columns and data types
        expected_columns = ['customer_id', 'billing_amount', 'tax_amount', 'total_charges']
        self.assertListEqual(transformed_df.columns.tolist(), expected_columns, "Unexpected columns in the transformed DataFrame")
        self.assertTrue(all(transformed_df['total_charges'].dtype == 'float'), "Total charges column should be of float type")

        # Test case 3: Check if the transformed DataFrame is not empty
        self.assertFalse(transformed_df.empty, "Transformed DataFrame is empty")
    def test_data_loading(self):
        # Test case 1: Check if data_loading function successfully saves the DataFrame to a CSV file
        df = data_extraction(TEST_DATA_FILE)
        transformed_df = data_transformation(df)
        data_loading(transformed_df, OUTPUT_FILE)
        self.assertTrue(os.path.exists(OUTPUT_FILE), "Output file not found after data_loading")

        # Test case 2: Check if the loaded CSV file contains the same data as the DataFrame
        loaded_df = pd.read_csv(OUTPUT_FILE)
        self.assertTrue(transformed_df.equals(loaded_df), "Loaded DataFrame is different from the original DataFrame")

        # Test case 3: Check if the loaded CSV file is not empty
        self.assertFalse(loaded_df.empty, "Loaded DataFrame is empty")

if __name__ == '__main__':
    unittest.main()










