In [56]:
import unittest
import pandas as pd
import numpy as np
import hvplot.pandas
import holoviews as hv
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from unittest.mock import patch
import datetime

# Importing custom defined functions that need to be tested
from myutils import perform_regression, \
    get_important_features_with_lasso, \
    get_param_grid, get_selected_features, \
    get_model_visualization_results, \
    extract_reply_body, \
    get_cleaned_data, \
    classify_sia_polarity, \
    classify_textblob_polarity, \
    predict_sentiment, \
    get_sentiment, \
    convert_quarter_to_dt, \
    cast_to_float

In [59]:
class RegressionTest(unittest.TestCase):
    def setUp(self):
        # Prepare a sample dataset for testing
        self.df = pd.DataFrame({
            'feature1': np.random.rand(100),
            'feature2': np.random.rand(100),
            'feature3': np.random.rand(100),
            'feature4': np.random.rand(100),
            'feature5': np.random.rand(100),
            'target': np.random.rand(100)
        })
        self.X = self.df.iloc[:, :-1]
        self.y = self.df['target']
    
    def test_perform_regression(self):
        # Create an instance of the model
        model = LinearRegression()
        
        # Perform regression. It takes 3 args, and returns list of 3 values: model, train_score and test_score
        result = perform_regression(model, self.X, self.y)
        
        # Assert that the result is a list with three elements
        self.assertIsInstance(result, list)
        self.assertEqual(len(result), 3)
        
        # Assert that the first element is the model object
        self.assertIsInstance(result[0], LinearRegression)
        
        # Assert that the second and third elements are floating-point values between 0 and 1
        self.assertIsInstance(result[1], float)
        self.assertIsInstance(result[2], float)
        self.assertGreaterEqual(result[1], 0)
        self.assertLessEqual(result[1], 1)
#         self.assertGreaterEqual(result[2], 0)  # Negative Test score was returned occasionally
        self.assertLessEqual(result[2], 1)
        
    def test_get_important_features_with_lasso(self):
        # Call the function to get the ordered features
        ordered_features_length = len(get_important_features_with_lasso(self.X, self.y))      
        expected_features_length = 5
        
        # Assert the number ordered features match the length of expected features
        self.assertEqual(ordered_features_length, expected_features_length)
        
    def test_get_param_grid(self):
        param_grid = get_param_grid(RandomForestClassifier())
        
        # Assert that the param_grid is a dict
        self.assertIsInstance(param_grid, dict)
        
    def test_get_selected_features(self):
        ordered_features = get_important_features_with_lasso(self.X, self.y)
        df = get_selected_features(self.X, ordered_features, 'Top 5')
        
        # Assert that the returned variable is Pandas DF, and has same no. of columns as requested
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(df.shape[1], 5)
        
    def test_get_model_visualization_results(self):
        # Adding 5 more columns because the function expects the passed DF has 5 dependent variables
        quarters = []
        for year in np.arange(1998, 2023):
            for q in ['Q1', 'Q2', 'Q3', 'Q4']:
                quarters.append(f'{year}{q}')
        self.df['Quarter'] = quarters
        for i in range(5):
            self.df[f'target{i}'] = np.random.rand(100)
            
        # Call the function with all required arguments
        hvplot_fig = get_model_visualization_results(self.df, LinearRegression(), None, False, 'All')
        
        # Assert that the returned value is a Holoviews layout
        self.assertIsInstance(hvplot_fig, hv.Layout)
        
    # For function extract_reply_body(), single comment
    def test_extract_reply_body_single_comment(self):
        reply = {
            'kind': 't1',
            'data': {
                'created': '2023-05-23',
                'author': 'sajjan',
                'link_id': 't3_postid',
                'subreddit': 'ireland',
                'body': 'First comment'
            }
        }
        expected_result = {
            'created_utc': '2023-05-23',
            'author': 'sajjan',
            'post_id': 'postid',
            'subreddit': 'ireland',
            'text': 'First comment'
        }
        result = extract_reply_body(reply)
        self.assertEqual(result, expected_result)
    
    # For function extract_reply_body(), listing of replies
    def test_extract_reply_body_listing(self):
        reply = {
            'kind': 'Listing',
            'data': {
                'children': [
                    {
                        'kind': 't1',
                        'data': {
                            'created': '2023-05-24',
                            'author': 'john',
                            'link_id': 't3_postid',
                            'subreddit': 'unitedkingdom',
                            'body': 'Reply number 1'
                        }
                    },
                    {
                        'kind': 't1',
                        'data': {
                            'created': '2023-05-24',
                            'author': 'joe',
                            'link_id': 't3_postid',
                            'subreddit': 'netherlands',
                            'body': 'Reply number 2'
                        }
                    }
                ]
            }
        }
        expected_result = [
            {
                'created_utc': '2023-05-24',
                'author': 'john',
                'post_id': 'postid',
                'subreddit': 'unitedkingdom',
                'text': 'Reply number 1'
            },
            {
                'created_utc': '2023-05-24',
                'author': 'joe',
                'post_id': 'postid',
                'subreddit': 'netherlands',
                'text': 'Reply number 2'
            }
        ]
        result = extract_reply_body(reply)
        self.assertEqual(result, expected_result)
        
    def test_get_cleaned_data(self):
        cleaned_data = get_cleaned_data(pd.Series("Hello! Python unittest is great!"))
        expected_data = 'hello python unittest great'
        
        # Assert that the returned variable is Pandas Series
        self.assertIsInstance(cleaned_data, pd.Series)
        # Assert that the first item of array is same as expected value
        self.assertEqual(cleaned_data.values[0], expected_data)
        
    def test_classify_textblob_polarity(self):
        sentiment = classify_textblob_polarity(-0.3)
        self.assertEqual(sentiment, 'negative')
        
    def test_classify_sia_polarity(self):
        polarity = classify_sia_polarity({'compound': 0.75})
        self.assertEqual(polarity, 'positive')
        
        
    @patch('myutils.SentimentIntensityAnalyzer')
    def test_predict_sentiment(self, mock_analyzer):
        mock_analyzer.return_value.polarity_scores.return_value = {'compound': 0.3}
        result = predict_sentiment('hello python unittest great')
        self.assertEqual(result, 'positive')
        
    def test_get_sentiment(self):
        sentiment = get_sentiment('hello python unittest great')
        self.assertEqual(sentiment, 'positive')
        
    def test_convert_quarter_to_dt(self):
        result_dt = convert_quarter_to_dt('2022Q4')
        self.assertIsInstance(result_dt, datetime.datetime)
        self.assertEqual(result_dt, datetime.datetime(2022, 12, 30, 0, 0))
        
    def test_cast_to_float(self):
        result = cast_to_float('100.5 s')
        self.assertIsInstance(result, float)
        self.assertEqual(result, 100.5)
            

In [60]:
unittest.main(argv=[''], exit=False, verbosity=2)

test_cast_to_float (__main__.RegressionTest) ... ok
test_classify_sia_polarity (__main__.RegressionTest) ... ok
test_classify_textblob_polarity (__main__.RegressionTest) ... ok
test_convert_quarter_to_dt (__main__.RegressionTest) ... ok
test_extract_reply_body_listing (__main__.RegressionTest) ... ok
test_extract_reply_body_single_comment (__main__.RegressionTest) ... ok
test_get_cleaned_data (__main__.RegressionTest) ... ok
test_get_important_features_with_lasso (__main__.RegressionTest) ... ok
test_get_model_visualization_results (__main__.RegressionTest) ... ok
test_get_param_grid (__main__.RegressionTest) ... ok
test_get_selected_features (__main__.RegressionTest) ... ok
test_get_sentiment (__main__.RegressionTest) ... ok
test_perform_regression (__main__.RegressionTest) ... ok
test_predict_sentiment (__main__.RegressionTest) ... ok

----------------------------------------------------------------------
Ran 14 tests in 0.173s

OK


<unittest.main.TestProgram at 0x16aaa03a0>