In [191]:
cd "D:\University\FIT3162\Project\Fake-News-Detection\Data Preprocessing"

D:\University\FIT3162\Project\Fake-News-Detection\Data Preprocessing


In [192]:
import sys
import os

module_path = os.path.abspath(os.getcwd())    

if module_path not in sys.path:       

    sys.path.append(module_path)

In [204]:
import unittest
import pytest
import pandas as pd
import numpy as np
import import_ipynb
import data_preprocessing1
import datetime as dt
from pandas.testing import assert_frame_equal

In [1]:
class TestDataPreprocessing(unittest.TestCase):

    def test_remove_unwanted_columns(self):
        remove_col = ['authors', 'sub_id']
        
        df = pd.DataFrame({
        'authors': ['daniel6','george33','mylife'],
        'user_id': ['200','215','345'],
        'title': ['brain','','this is hard'],
        'utc': ['1514784048','1490750889','1490751569'],
        'sub_reddit': ['pic','work','tough-calls'],
        'sub_id': ['3456','122345','283485504'],
        'body': ['nice one','cool','elaborate please']
        })
        
        expected = pd.DataFrame({
        'user_id': ['200','215','345'],
        'title': ['brain','','this is hard'],
        'utc': ['1514784048','1490750889','1490751569'],
        'sub_reddit': ['pic','work','tough-calls'],
        'body': ['nice one','cool','elaborate please']
        })

        result = data_preprocessing1.remove_unwanted_columns(df, remove_col)
        
        columns_expected = len(expected.columns)
        columns_result = len(result.columns)

        self.assertEqual(columns_expected,columns_result)
    
    def test_delete_duplicated_posts(self):
        
        df = pd.DataFrame({
        'id': ['200','215','345','200'],
        'title': ['brain','','this is hard','milk is great'],
        'utc': ['1514784048','1490750889','1490751569','1490750889'],
        'sub_reddit': ['pic','work','tough-calls','dairy'],
        'body': ['nice one','cool','elaborate please','its always']
        })
        
        expected = pd.DataFrame({
        'id': ['215','345'],
        'title': ['','this is hard'],
        'utc': ['1490750889','1490751569'],
        'sub_reddit': ['work','tough-calls'],
        'body': ['cool','elaborate please']
        }) 
            
        result = data_preprocessing1.delete_duplicated_posts(df)
        
        
        rows_expected = expected['id'].count()
        rows_result = result['id'].count()

        assert rows_expected == rows_result, 'Delete duplicated posts test failed'
        
    def test_delete_empty_posts(self):
        
        df = pd.DataFrame({
        'id': ['200','215','345','200'],
        'title': ['brain',np.NaN,'this is hard','milk is great'],
        'utc': ['1514784048','1490750889','1490751569','1490750889'],
        'sub_reddit': ['pic','work','tough-calls','dairy'],
        'body': ['nice one','cool','elaborate please','its always']
        })
        
        expected = pd.DataFrame({
        'id': ['200','345','200'],
        'title': ['brain','this is hard','milk is great'],
        'utc': ['1514784048','1490751569','1490750889'],
        'sub_reddit': ['pic','tough-calls','dairy'],
        'body': ['nice one','elaborate please','its always']
        }) 
    
        result = data_preprocessing1.delete_empty_posts(df,subset_list=['title'])

        
        rows_expected = expected['id'].count()
        rows_result = result['id'].count()
        
        assert rows_expected == rows_result, 'Delete empty posts test failed'
    
    def test_remove_subreddits(self):
        df = pd.DataFrame({
        'id': ['200','215','345','200'],
        'title': ['brain','','this is hard','milk is great'],
        'utc': ['1514784048','1490750889','1490751569','1490750889'],
        'subreddit': ['pic','work','tough-calls','dairy'],
        'body': ['nice one','cool','elaborate please','its always']
        })
        
        expected = pd.DataFrame({
        'id': ['215','345','200'],
        'title': ['','this is hard','milk is great'],
        'utc': ['1490750889','1490751569','1490750889'],
        'subreddit': ['work','tough-calls','dairy'],
        'body': ['cool','elaborate please','its always']
        }) 
        

        
        result = data_preprocessing1.remove_subreddits(df)
        
        rows_expected = expected['id'].count()
        rows_result = result['id'].count()
        
        assert rows_expected == rows_result, 'Remove subreddits test failed'
        
    def test_delete_removed_comments(self):
        df = pd.DataFrame({
        'id': ['200','215','345','200'],
        'title': ['brain','','this is hard','milk is great'],
        'utc': ['1514784048','1490750889','1490751569','1490750889'],
        'subreddit': ['pic','work','tough-calls','dairy'],
        'body': ['nice one','[deleted]','[removed]','its always']
        })
        
        expected = pd.DataFrame({
        'id': ['200','200'],
        'title': ['brain','milk is great'],
        'utc': ['1514784048','1490750889'],
        'subreddit': ['pic','dairy'],
        'body': ['nice one','its always']
        }) 
        
    
        result = data_preprocessing1.delete_removed_comments(df)
    
        
        rows_expected = expected['id'].count()
        rows_result = result['id'].count()
        
        assert rows_expected == rows_result, 'Delete removed comments test failed'
        
    def test_change_date(self):
        df = pd.DataFrame({
        'id': ['200','220'],
        'title': ['brain','milk is great'],
        'created_utc': [1514784048,1490750889],
        'subreddit': ['pic','dairy'],
        'body': ['nice one','its always']
        })
        
        expected = pd.DataFrame({
        'id': ['200','220'],
        'title': ['brain','milk is great'],
        'created_utc': ['2018-01-01 13:20:48','2017-03-29 09:28:09'],
        'subreddit': ['pic','dairy'],
        'body': ['nice one','its always']
        }) 
        
        result = data_preprocessing1.change_date(df)

        assert result.iloc[0]['created_utc'].strftime('%Y-%m-%d %H:%M:%S') == expected.iloc[0]['created_utc'], 'Change UTC test Failed'
   

SyntaxError: invalid syntax (<ipython-input-1-d2029f2b24c0>, line 154)

In [2]:
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored for jupyter'], exit=False)


NameError: name 'unittest' is not defined