Skip to content

Commit

Permalink
Pass type test even with categorical
Browse files Browse the repository at this point in the history
  • Loading branch information
benjello committed Aug 14, 2019
1 parent 2239250 commit fa6cb8d
Show file tree
Hide file tree
Showing 9 changed files with 23 additions and 21 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 0.34 [#XXX](https://github.com/openfisca/openfisca-survey-manager/pull/XXX)

* Better handling of categorical variables

## 0.33 [#145](https://github.com/openfisca/openfisca-survey-manager/pull/145)

* Convert string-like columns to category and save to HDF files in table mode
Expand Down
5 changes: 3 additions & 2 deletions openfisca_survey_manager/calmar.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# -*- coding: utf-8 -*-


from __future__ import division

import logging
import operator

Expand Down Expand Up @@ -228,6 +226,9 @@ def constraint_prime(l):
# rebuilding a weight vector with the same size of the initial one
pondfin_out = array(data_in[initial_weight], dtype = float64)
pondfin_out[is_non_zero_weight] = pondfin

del infodict, mesg # TODO better exploit this information

return pondfin_out, lambdasol, margins_new_dict


Expand Down
2 changes: 1 addition & 1 deletion openfisca_survey_manager/input_dataframe_generator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

from __future__ import division

from builtins import range

import configparser
Expand Down
13 changes: 7 additions & 6 deletions openfisca_survey_manager/scenarios.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-

from __future__ import division

from typing import Dict, List

Expand Down Expand Up @@ -29,17 +28,18 @@


class AbstractSurveyScenario(object):
baseline_simulation = None
baseline_tax_benefit_system = None
cache_blacklist = None
collection = None
debug = False
filtering_variable_by_entity = None
id_variable_by_entity_key = None
inflator_by_variable = None # factor used to inflate variable total
input_data_frame = None
input_data_table_by_period = None
input_data_table_by_entity_by_period = None
input_data_table_by_period = None
non_neutralizable_variables = None
cache_blacklist = None
baseline_simulation = None
baseline_tax_benefit_system = None
role_variable_by_entity_key = None
simulation = None
target_by_variable = None # variable total target to inflate to
Expand Down Expand Up @@ -1419,7 +1419,8 @@ def init_variable_in_entity(simulation, entity, variable_name, series, period):
'Converting {} from dtype {} to {}'.format(
variable_name, series.values.dtype, variable.dtype)
)
if np.issubdtype(series.values.dtype, np.floating):
# np.issubdtype cannot handles categorical variables
if (not pd.api.types.is_categorical_dtype(series)) and np.issubdtype(series.values.dtype, np.floating):
if series.isnull().any():
log.debug('There are {} NaN values for {} non NaN values in variable {}'.format(
series.isnull().sum(), series.notnull().sum(), variable_name))
Expand Down
2 changes: 0 additions & 2 deletions openfisca_survey_manager/statshelpers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# -*- coding: utf-8 -*-


from __future__ import division

from numpy import argsort, asarray, cumsum, linspace, logical_and as and_, ones, repeat, zeros
import pandas as pd
import weighted
Expand Down
6 changes: 4 additions & 2 deletions openfisca_survey_manager/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ def fill_hdf(self, **kwargs):
if not overwrite:
store = pandas.HDFStore(self.survey.hdf5_file_path)
if self.name in store:
log.info('Exiting without overwriting {} in '.format(self.name, self.survey.hdf5_file_path))
log.info(
'Exiting without overwriting {} in {}'.format(
self.name, self.survey.hdf5_file_path))
else:
self._check_and_log(data_file)
try:
Expand All @@ -139,8 +141,8 @@ def fill_hdf(self, **kwargs):
log.info("File {} has been processed in {}".format(
data_file, datetime.datetime.now() - start_table_time))
except ValueError as e:
raise e
log.info('Skipping file {} because of following error \n {}'.format(data_file, e))
raise e

def save_data_frame(self, data_frame, **kwargs):
data_frame.to_hdf(self.survey.hdf5_file_path, self.name, append = False, **kwargs)
Expand Down
2 changes: 0 additions & 2 deletions openfisca_survey_manager/tests/test_quantile.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

from __future__ import division


import numpy as np
import pandas as pd
Expand Down
8 changes: 3 additions & 5 deletions openfisca_survey_manager/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

from __future__ import division


import logging
import os
Expand All @@ -27,7 +25,7 @@ def inflate_parameters(parameters, inflator, base_year, last_year = None, ignore

assert last_year == base_year + 1

for name, sub_parameter in parameters.children.items():
for sub_parameter in parameters.children.values():
if isinstance(sub_parameter, ParameterNode):
inflate_parameters(sub_parameter, inflator, base_year, last_year, ignore_missing_units = ignore_missing_units)
else:
Expand Down Expand Up @@ -55,7 +53,7 @@ def inflate_parameters(parameters, inflator, base_year, last_year = None, ignore
(unit_type, sub_parameter.metadata[unit_type]) for unit_type in unit_types
])

for unit_type, unit in unit_by_type.items():
for unit_type in unit_by_type.keys():
if sub_parameter.metadata[unit_type].startswith("currency"):
inflate_parameter_leaf(sub_parameter, base_year, inflator, unit_type = unit_type)

Expand Down Expand Up @@ -150,7 +148,7 @@ def parameters_asof(parameters, instant):
instant = periods.instant(instant)
assert isinstance(instant, periods.Instant)

for name, sub_parameter in parameters.children.items():
for sub_parameter in parameters.children.values():
if isinstance(sub_parameter, ParameterNode):
parameters_asof(sub_parameter, instant)
else:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

setup(
name = 'OpenFisca-Survey-Manager',
version = '0.33.0',
version = '0.34.0',
author = 'OpenFisca Team',
author_email = 'contact@openfisca.fr',
classifiers = [classifier for classifier in classifiers.split('\n') if classifier],
Expand Down

0 comments on commit fa6cb8d

Please sign in to comment.