In [64]:
import nbimporter # Module to import Jupyter Notebook File
import Application as ap # File containing main code

import unittest
from unittest.mock import patch, MagicMock
import pandas as pd

In [65]:
class TestRestaurantDataProcessor(unittest.TestCase):
    def setUp(self):
        """
        Prepare resources for tests.
        """
        self.url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"
        self.country_code_file = "country_code.csv"
        self.output_file_path = "processed_restaurants.csv"
        self.processor = ap.RestaurantDataProcessor(self.url, self.country_code_file, self.output_file_path)

    def test_fetch_json_data(self):
        """
        Test fetching JSON data from URL
        """
        with patch('requests.get') as mock_get:
            # Mocking a successful API response
            mock_response = MagicMock()
            mock_response.ok = True
            mock_response.json.return_value = {"restaurants": []}
            mock_get.return_value = mock_response

            # Testing successful data fetch
            result = self.processor.fetch_json_data()
            self.assertIsNotNone(result)
            mock_get.assert_called_once_with(self.url)

            # Mocking an unsuccessful response and testing failure handling
            mock_response.ok = False
            result = self.processor.fetch_json_data()
            self.assertIsNone(result)

    def test_read_country_code_data(self):
        """
        Test reading country code data from a CSV file.
        """
        with patch('pandas.read_csv') as mock_read_csv:
            # Mocking successful CSV file read
            mock_read_csv.return_value = pd.DataFrame(data={"Country Code": [1], "Country": ["India"]})
            result = self.processor.read_country_code_data()
            self.assertIsNotNone(result)
            mock_read_csv.assert_called_once_with(self.country_code_file)

            # Mocking FileNotFoundError to test error handling
            mock_read_csv.side_effect = FileNotFoundError
            result = self.processor.read_country_code_data()
            self.assertIsNone(result)

    def test_merge_data(self):
        """
        Test merging JSON data with country code data.
        """
        # Mock parsed data
        parsed_data = {
            "restaurants": [
                {"name": "Restaurant A", "restaurant.location.country_id": 1, "cuisine": "Italian"},
                {"name": "Restaurant B", "restaurant.location.country_id": 2, "cuisine": "French"}
            ]
        }
        # Mock country code df
        country_code_df = pd.DataFrame({
            "Country Code": [1, 2],
            "Country Name": ["Italy", "France"]
        })

        merged_df = self.processor.merge_data(parsed_data, country_code_df)

        # Check if the merged DataFrame has the expected columns
        expected_columns = ["name", "restaurant.location.country_id", "cuisine", 
                            "Country Code", "Country Name"
                            ]
        # Check if merged df missing one or more expected columns
        self.assertTrue(all(column in merged_df.columns for column in expected_columns))

        # Verify that the merge operation is correct by checking individual rows
        self.assertEqual(merged_df.loc[0, "Country Name"], "Italy")
        self.assertEqual(merged_df.loc[1, "Country Name"], "France")

        # Check the size of df 
        self.assertEqual(len(merged_df), 2)

    def test_process_merged_data(self):
        """
        Test processing of the merged DataFrame.
        """
        merged_df = pd.DataFrame({
            'restaurant.R.res_id': [1, 2, None],
            'restaurant.name': ['Restaurant A', 'Restaurant B', None],
            'Country': ['Country A', 'Country B', 'Country C'],
            'restaurant.location.city': ['City A', 'City B', 'City C'],
            'restaurant.user_rating.votes': [100, 200, 300],
            'restaurant.user_rating.aggregate_rating': ['4.5', '3.5', '4'],
            'restaurant.cuisines': ['Cuisine A', 'Cuisine B', 'Cuisine C']
        })
        result = self.processor.process_merged_data(merged_df)

        # Expected columns after processing
        expected_columns = ['Restaurant Id', 'Restaurant Name', 'Country', 'City', 
                            'User Rating Votes', 'User Aggregate Rating', 'Cuisines']

        # Check that the processed DataFrame has the correct columns
        self.assertListEqual(list(result.columns), expected_columns)
        # Check that there are no missing values in the DataFrame
        self.assertFalse(result.isnull().values.any())
        # Checking data type conversion
        self.assertTrue(result["User Aggregate Rating"].dtype == float)  

    def test_export_df_to_csv(self):
        """
        Test exporting df to a CSV file
        """
        with patch('pandas.DataFrame.to_csv') as mock_to_csv:
            df = pd.DataFrame()
            self.processor.export_df_to_csv(df)
            mock_to_csv.assert_called_once_with(self.output_file_path, index=False)

    def test_run(self):
        """
        Test the complete data processing workflow
        """
        # Mocking all methods called within the run method
        with patch.object(self.processor, 'fetch_json_data') as mock_fetch, \
             patch.object(self.processor, 'read_country_code_data') as mock_read, \
             patch.object(self.processor, 'merge_data') as mock_merge, \
             patch.object(self.processor, 'process_merged_data') as mock_process, \
             patch.object(self.processor, 'export_df_to_csv') as mock_export:
                
            # Setting return values for mocked methods
            mock_fetch.return_value = {"restaurants": []}
            mock_read.return_value = pd.DataFrame()
            mock_merge.return_value = pd.DataFrame()
            mock_process.return_value = pd.DataFrame()

            self.processor.run()

            # Verifying that each method was called once
            mock_fetch.assert_called_once()
            mock_read.assert_called_once()
            mock_merge.assert_called_once()
            mock_process.assert_called_once()
            mock_export.assert_called_once()

In [66]:
class TestEventDataProcessor(unittest.TestCase):
    def setUp(self):
        self.dummy_df = pd.DataFrame({
            "restaurant.zomato_events": [[{"event": {"event_id": 1}}]]
        })
        # Initialize class
        self.processor = ap.EventDataProcessor(self.dummy_df, 2021, 12, "output.csv")

    def test_expand_and_normalize_events(self):
        """
        Test expanding and normalizing events data.
        """
        # Patch the DataFrame's explode method and the json_normalize function
        with patch('pandas.DataFrame.explode') as mock_explode, \
             patch('pandas.json_normalize') as mock_json_normalize:
            mock_explode.return_value = self.dummy_df  # Mock the result of explode
            mock_json_normalize.return_value = pd.DataFrame({"event.event_id": [1]})  # Mock the result of json_normalize

            result_df = self.processor.expand_and_normalize_events()

            # Verify that explode and json_normalize were called as expected
            mock_explode.assert_called_once_with("restaurant.zomato_events")
            mock_json_normalize.assert_called_once()
            # Ensure the resulting DataFrame has the expected column from normalization
            self.assertIn("event.event_id", result_df.columns)

    def test_filter_events_by_date(self):
        """
        Test filtering events by date to include events active in April 2019.
        """
        
        # Setup a mock DataFrame to filter
        events_df = pd.DataFrame({
            "event.start_date": ["2019-03-01", "2019-05-01"],
            "event.end_date": ["2019-05-31", "2020-01-01"]
        })

        filtered_df = self.processor.filter_events_by_date(events_df, 2019, 4)

        # Adjust the expected DataFrame to match the correct filtering outcome
        expected_df = pd.DataFrame({
            "event.start_date": pd.to_datetime(["2019-03-01"]),
            "event.end_date": pd.to_datetime(["2019-05-31"])
        })

        # Compare the filtered result with the expected DataFrame
        pd.testing.assert_frame_equal(filtered_df.reset_index(drop=True), expected_df.reset_index(drop=True), check_dtype=True)

    def test_process_events_data(self):
        """
        Test processing of events data.
        """
        # Patch the internal methods used in the process_events_data method
        filtered_df = pd.DataFrame({
            "event.event_id": [1], 
            "restaurant.id": [101], 
            "restaurant.name": ["Test Restaurant"], 
            "restaurant.photos_url": ["http://example.com/photo.jpg"], 
            "event.title": ["Test Event"], 
            "event.start_date": ["2021-01-01"], 
            "event.end_date": ["2021-01-02"]
        })

        # Simulate the final expected DataFrame structure after processing
        expected_df = pd.DataFrame({
            "Event Id": [1], 
            "Restaurant Id": [101], 
            "Restaurant Name": ["Test Restaurant"], 
            "Photo URL": ["http://example.com/photo.jpg"], 
            "Event Title": ["Test Event"],  
            "Event Start Date": ["2021-01-01"],
            "Event End Date": ["2021-01-02"]
        })

        with patch.object(self.processor, 'expand_and_normalize_events', return_value=pd.DataFrame()) as mock_expand, \
             patch.object(self.processor, 'filter_events_by_date', return_value=filtered_df) as mock_filter:

            result_df = self.processor.process_events_data()

            # Verify that each method was called once as part of the data processing workflow
            mock_expand.assert_called_once()
            mock_filter.assert_called_once()
            
            # Compare dataframes to check if renaming of columns have worked
            pd.testing.assert_frame_equal(result_df, expected_df)



    def test_export_events_to_csv(self):
        """
        Test exporting events data to CSV.
        """
        # Patch the to_csv method of DataFrame
        with patch('pandas.DataFrame.to_csv') as mock_to_csv:
            df_to_export = pd.DataFrame({"event.event_id": [1]})

            self.processor.export_events_to_csv(df_to_export, "output.csv")

            # Verify that to_csv was called with the correct arguments
            mock_to_csv.assert_called_once_with("output.csv", index=False)

    def test_run(self):
        """
        Test the full run method orchestrating the events data processing.
        """
        # Patch the methods called within the run method
        with patch.object(self.processor, 'process_events_data', return_value=pd.DataFrame()) as mock_process, \
             patch.object(self.processor, 'export_events_to_csv') as mock_export:

            self.processor.run()

            # Verify that the process and export methods are each called once
            mock_process.assert_called_once()
            mock_export.assert_called_once()

In [67]:
class TestRatingStatisticsProcessor(unittest.TestCase):
    def setUp(self):
        """
        Initialize RatingStatisticsProcessor instance with a dummy DataFrame and a test output path.
        """
        self.dummy_df = pd.DataFrame({
            'restaurant.user_rating.rating_text': ['Excellent', 'Good', 'Average', 'Poor', 'Excellent', 'Good', "WRONG"],
            'restaurant.user_rating.aggregate_rating': ["4.5", '4.0', '3.0', '2.0',' 5.0', '4.2', '1.0']
            })
        self.output_file_path = "test_output.json"
        self.processor = ap.RatingStatisticsProcessor(self.dummy_df, self.output_file_path)

    def test_filter_and_convert_ratings(self):
        """
        Test filtering by specified rating texts and conversion of rating to float.
        """

        specified_texts =  ["Excellent","Good", "Average"]
        result_df = self.processor.filter_and_convert_ratings(specified_texts)

        # Verify that only rows with specified rating texts are present
        self.assertTrue(all(
            result_df['restaurant.user_rating.rating_text'].isin(specified_texts)
            ))

    def test_analyze_rating_distribution(self):
        """
        Test analysis of rating distribution.
        
        Checks that the method returns a DataFrame with correct aggregation of ratings.
        """
        filtered_df = self.processor.filter_and_convert_ratings(["Excellent", "Good"])
        result_statistics = self.processor.analyze_rating_distribution(filtered_df)

        # Verify the structure and content of the result DataFrame
        self.assertIn("min", result_statistics.columns)
        self.assertIn("max", result_statistics.columns)

    def test_export_to_json(self):
        """
        Test exporting rating statistics to a JSON file.
        
        Verifies that the method attempts to write to the specified file path.
        """
        # Mock DataFrame to test export
        rating_statistics = pd.DataFrame({
            "User Rating": ["Excellent","Very Good", "Good", "Average", "Poor"],
            "min": [4.0, 3.0,2.0,1.0,0.0],
            "max": [5.0, 4.0, 3.0,2.0,1.0]
        })

        with patch('pandas.DataFrame.to_json') as mock_to_json:
            self.processor.export_to_json(rating_statistics)

            # Verify that to_json was called with the correct file path
            mock_to_json.assert_called_once_with(self.output_file_path, orient='records')

    def test_run(self):
        """
        Test the full run method orchestrating the rating statistics processing.
        
        Verifies that the process correctly filters, analyzes, and exports rating data.
        """
        # Patch the internal methods to isolate the run method's workflow
        with patch.object(self.processor, 'filter_and_convert_ratings', return_value=pd.DataFrame()) as mock_filter, \
             patch.object(self.processor, 'analyze_rating_distribution', return_value=pd.DataFrame()) as mock_analyze, \
             patch.object(self.processor, 'export_to_json') as mock_export:

            self.processor.run(["Excellent","Very Good", "Good", "Average", "Poor"])

            # Verify that each method in the workflow is called once
            mock_filter.assert_called_once()
            mock_analyze.assert_called_once()
            mock_export.assert_called_once()


In [68]:
unittest.main(argv=['first-arg-is-ignored'], exit=False)

......
----------------------------------------------------------------------
Ran 15 tests in 0.034s

OK


Events data exported successfully to output.csv
Rating statistics exported successfully
Restaurant Data exported successfully to processed_restaurants.csv
Error reading country code file: 


<unittest.main.TestProgram at 0x236472c6790>