diff --git a/rosie/chamber_of_deputies/classifiers/election_expenses_classifier.py b/rosie/chamber_of_deputies/classifiers/election_expenses_classifier.py index d6c3299..6ab6225 100644 --- a/rosie/chamber_of_deputies/classifiers/election_expenses_classifier.py +++ b/rosie/chamber_of_deputies/classifiers/election_expenses_classifier.py @@ -1,6 +1,8 @@ from sklearn.base import TransformerMixin +ELECTION_LEGAL_ENTITY = '409-0 - CANDIDATO A CARGO POLITICO ELETIVO' + class ElectionExpensesClassifier(TransformerMixin): """ Election Expenses classifier. @@ -14,11 +16,11 @@ class ElectionExpensesClassifier(TransformerMixin): Brazilian Federal Revenue category of companies, preceded by its code. """ - def fit(self, X): - return self + def fit(self, dataframe): + pass - def transform(self, X=None): - return self + def transform(self, dataframe=None): + pass - def predict(self, X): - return X['legal_entity'] == '409-0 - CANDIDATO A CARGO POLITICO ELETIVO' + def predict(self, dataframe): + return dataframe['legal_entity'] == ELECTION_LEGAL_ENTITY diff --git a/rosie/chamber_of_deputies/tests/fixtures/election_expenses_classifier.csv b/rosie/chamber_of_deputies/tests/fixtures/election_expenses_classifier.csv deleted file mode 100644 index 77a594f..0000000 --- a/rosie/chamber_of_deputies/tests/fixtures/election_expenses_classifier.csv +++ /dev/null @@ -1,3 +0,0 @@ -congressperson_name,name,legal_entity -CARLOS ALBERTO DA SILVA, ELEICAO 2006 CARLOS ALBERTO DA SILVA DEPUTADO,409-0 - CANDIDATO A CARGO POLITICO ELETIVO -PAULO ROGERIO ROSSETO DE MELO,POSTO ROTA 116 DERIVADOS DE PETROLEO LTDA,401-4 - EMPRESA INDIVIDUAL IMOBILIARIA diff --git a/rosie/chamber_of_deputies/tests/test_election_expenses_classifier.py b/rosie/chamber_of_deputies/tests/test_election_expenses_classifier.py index 69eea6c..31c2590 100644 --- a/rosie/chamber_of_deputies/tests/test_election_expenses_classifier.py +++ b/rosie/chamber_of_deputies/tests/test_election_expenses_classifier.py @@ -9,18 +9,36 @@ class TestElectionExpensesClassifier(TestCase): def setUp(self): - self.dataset = pd.read_csv('rosie/chamber_of_deputies/tests/fixtures/election_expenses_classifier.csv', - dtype={'name': np.str, 'legal_entity': np.str}) - self.subject = ElectionExpensesClassifier() + self.election_expenser_classifier = ElectionExpensesClassifier() - def test_is_election_company(self): - self.assertEqual(self.subject.predict(self.dataset)[0], True) + def test_legal_entity_is_a_election_company(self): + self.dataframe = self._create_dataframe([[ + 'CARLOS ALBERTO DA SILVA', + 'ELEICAO 2006 CARLOS ALBERTO DA SILVA DEPUTADO', + '409-0 - CANDIDATO A CARGO POLITICO ELETIVO' + ]]) - def test_is_not_election_company(self): - self.assertEqual(self.subject.predict(self.dataset)[1], False) + prediction_result = self.election_expenser_classifier.predict(self.dataframe) - def test_fit(self): - self.assertEqual(self.subject.fit(self.dataset), self.subject) + self.assertEqual(prediction_result[0], True) - def test_tranform(self): - self.assertEqual(self.subject.transform(), self.subject) + def test_legal_entity_is_not_election_company(self): + self.dataframe = self._create_dataframe([[ + 'PAULO ROGERIO ROSSETO DE MELO', + 'POSTO ROTA 116 DERIVADOS DE PETROLEO LTDA', + '401-4 - EMPRESA INDIVIDUAL IMOBILIARIA' + ]]) + + prediction_result = self.election_expenser_classifier.predict(self.dataframe) + + self.assertEqual(prediction_result[0], False) + + def test_fit_just_for_formality_because_its_never_used(self): + empty_dataframe = pd.DataFrame() + self.assertTrue(self.election_expenser_classifier.fit(empty_dataframe) is None) + + def test_transform_just_for_formality_because_its_never_used(self): + self.assertTrue(self.election_expenser_classifier.transform() is None) + + def _create_dataframe(self, dataframe_data): + return pd.DataFrame(data=dataframe_data, columns=['congressperson_name', 'name', 'legal_entity'])