Skip to content

Commit

Permalink
Merge 3aae32f into 5171e28
Browse files Browse the repository at this point in the history
  • Loading branch information
jseabold committed Apr 10, 2014
2 parents 5171e28 + 3aae32f commit 72ae5be
Show file tree
Hide file tree
Showing 3 changed files with 371 additions and 0 deletions.
5 changes: 5 additions & 0 deletions statsmodels/tools/sm_exceptions.py
Expand Up @@ -17,6 +17,11 @@ class PerfectSeparationError(Exception):

class MissingDataError(Exception):
pass


class X12NotFoundError(Exception):
pass

# Warning
class ModuleUnavailableWarning(Warning):
pass
Expand Down
1 change: 1 addition & 0 deletions statsmodels/tsa/api.py
Expand Up @@ -13,3 +13,4 @@
from .base import datetools
from .seasonal import seasonal_decompose
from ..graphics import tsaplots as graphics
import x12
365 changes: 365 additions & 0 deletions statsmodels/tsa/x12.py
@@ -0,0 +1,365 @@
"""
Run x12/x13-arima specs in a subprocess from Python and curry results back
into python.
"""

import os
import subprocess
import tempfile
import re

import pandas as pd

from statsmodels.tools.tools import Bunch
from statsmodels.tools.sm_exceptions import X12NotFoundError

__all__ = ["select_arima_order"]

class _freq_to_period:
def __getitem__(self, key):
if key.startswith('M'):
return 12
elif key.startswith('Q'):
return 4


_freq_to_period = _freq_to_period()

_period_to_freq = {12 : 'M', 4 : 'Q'}
_log_to_x12 = {True : 'log', False : 'none', None : 'auto'}
_bool_to_yes_no = lambda x : 'yes' if x else 'no'


def _find_x12(x12path=None):
if x12path is None:
x12path = os.getenv("X12PATH", "")
if x12path and not x12path.endswith("/"):
x12path += "/"

try:
subprocess.check_call("{}x12a".format(x12path),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
return True
except OSError, err:
try:
subprocess.check_call("{}x13a".format(x12path),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
return True
except OSError, err:
if err.errno == os.errno.ENOENT:
return False
#raise X12NotFoundError("Neither x12a nor x13a found "
# "on PATH")
return False


def _check_x12(x12path=None):
if not _find_x12(x12path):
raise X12NotFoundError("x12a and x13a not found on path. Give the "
"path, put them on the path, or set the "
"X12PATH environmental variable.")


def _clean_order(order):
"""
Takes something like (1 1 0)(0 1 1) and returns a arma order, sarma
order tuple. Also accepts (1 1 0) and return arma order and (0, 0, 0)
"""
order = re.findall("\([0-9 ]*?\)", order)
clean = lambda x : tuple(map(int, re.sub("[()]", "", x).split(" ")))
if len(order) > 1:
order, sorder = map(clean, order)
else:
order = clean(order[0])
sorder = "(0,0,0)"

return order, sorder


def run_spec(x12path, specpath, outname=None, meta=False, datameta=False):

if meta and datameta:
raise ValueError("Cannot specify both meta and datameta.")
if meta:
args = [x12path, "-m " + specpath]
elif datameta:
args = [x12path, "-d " + specpath]
else:
args = [x12path, specpath]

if outname:
args += [outname]

return subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)


def _make_automdl_options(maxorder, maxdiff):
options = "\n"
options += "maxorder = ({} {})\n".format(maxorder[0], maxorder[1])
options += "maxdiff = ({} {})\n".format(maxdiff[0], maxdiff[1])
return options


def _make_var_names(X):
if hasattr(X, "name"):
var_names = X.name
elif hasattr(X, "columns"):
var_names = X.columns
else:
raise ValueError("X is not a Series or DataFrame or is unnamed.")
return " ".join(var_names)


def _make_regression_options(trading, X):
if not trading and X is None: # start regression spec
return ""

reg_spec = "regression{\n"
if trading:
reg_spec += " variables = (td)\n"
if X is not None:
var_names = _make_var_names(X)
reg_spec += " user = ({})\n".format(var_names)
reg_spec += " data = ({})\n".format("\n".join(map(str,
X.values.ravel().tolist())))

reg_spec += "}\n" # close out regression spec
return reg_spec


def _check_errors(errors):
errors = errors[errors.find("spc:")+4:].strip()
if errors and 'ERROR' in errors:
raise ValueError(errors)
elif errors and 'WARNING' in errors:
from warnings import warn
warn(errors, UserWarning)


def _convert_out_to_series(x, dates, name):
"""
Convert x to a DataFrame where x is a string in the format given by
x-13arima-seats output.
"""
from StringIO import StringIO
from pandas import read_table
out = read_table(StringIO(x), skiprows=2, header=None)
return out.set_index(dates).rename(columns={1 : name})[name]


class Spec(object):
@property
def spec_name(self):
return self.__class__.__name__.replace("Spec", "")

def create_spec(self, **kwargs):
spec = """{name} {{
{options}
}}
"""
return spec.format(name=self.spec_name,
options=self.options)

def set_options(self, **kwargs):
options = ""
for key, value in kwargs.iteritems():
options += "{}={}\n".format(key, value)
self.__dict__.update({key : value})
self.options = options


class SeriesSpec(Spec):
"""
Parameters
----------
data
appendbcst : bool
appendfcst : bool
comptype
compwt
decimals
modelspan
name
period
precision
to_print
to_save
span
start
title
type
Notes
-----
Rarely used arguments
divpower
missingcode
missingval
saveprecision
trimzero
"""
def __init__(self, data, name='Unnamed Series', appendbcst=False,
appendfcst=False,
comptype=None, compwt=1, decimals=0, modelspan=(),
period=12, precision=0, to_print=[], to_save=[], span=(),
start=(1, 1), title='', series_type=None, divpower=None,
missingcode=-99999, missingval=1000000000):

appendbcst, appendfcst = map(_bool_to_yes_no, [appendbcst,
appendfcst,
])

series_name = "\"{}\"".format(name[:64]) # trim to 64 characters
title = "\"{}\"".format(title[:79]) # trim to 79 characters
self.set_options(data=data, appendbcst=appendbcst,
appendfcst=appendfcst, period=period, start=start,
title=title, name=series_name,
)


def pandas_to_series_spec(x):
#from statsmodels.tools.data import _check_period_index
#_check_period_index(x)
data = "({})".format("\n".join(map(str, x.values.tolist())))

# get periodicity
# get start / first data
# give it a title
try:
period = _freq_to_period[x.index.freqstr]
except (AttributeError, ValueError):
from pandas.tseries.api import infer_freq
period = _freq_to_period[infer_freq(x.index)]
start_date = x.index[0]
if period == 12:
year, stperiod = start_date.year, start_date.month
elif period == 4:
year, stperiod = start_date.year, start_date.quarter
else: # pragma: no cover
raise ValueError("Only monthly and quarterly periods are supported."
" Please report or send a pull request if you want "
"this extended.")

name = x.name or "Unnamed Series"
series_spec = SeriesSpec(data=data, name=name, period=period,
title=name, start="{}.{}".format(year, stperiod))
return series_spec


def x13arima_analysis(x12path, y, X=None, log=None, outlier=True,
trading=False, retspec=False, speconly=False,
maxorder=(2, 1), maxdiff=(2, 1), print_stdout=False,
start=None, freq=None):
_check_x12(x12path)

if not isinstance(y, (pd.DataFrame, pd.Series)):
if start is None or freq is None:
raise ValueError("start and freq cannot be none if y is not "
"a pandas object")
y = pd.Series(y, index=pd.DatetimeIndex(start=start, periods=len(y),
freq=freq))
spec_obj = pandas_to_series_spec(y)
spec = spec_obj.create_spec()
if log is not None:
spec += "transform{{function={}}}\n".format(_log_to_x12[log])
if outlier:
spec += "outlier{}\n"
options = _make_automdl_options(maxorder, maxdiff)
spec += "automdl{{{}}}\n".format(options)
spec += _make_regression_options(trading, X)
spec += "x11{ save=(d11 d12 d13) }"
if speconly:
return spec
# write it to a tempfile
#TODO: make this more robust - give the user some control?
ftempin = tempfile.NamedTemporaryFile(delete=False, suffix='.spc')
ftempout = tempfile.NamedTemporaryFile(delete=False)
try:
ftempin.write(spec)
ftempin.close()
ftempout.close()
# call x12 arima
p = run_spec(x12path, ftempin.name[:-4], ftempout.name)
p.wait()
stdout = p.stdout.read()
if print_stdout:
print p.stdout.read()
# check for errors
#TODO:
errors = open(ftempout.name + '.err', 'r').read()
_check_errors(errors)

# read in results
results = open(ftempout.name + '.out', 'r').read()
seasadj = open(ftempout.name + '.d11', 'r').read()
trend = open(ftempout.name + '.d12', 'r').read()
irregular = open(ftempout.name + '.d13', 'r').read()
finally:
os.remove(ftempin.name)
os.remove(ftempout.name)

seasadj = _convert_out_to_series(seasadj, y.index, 'seasadj')
trend = _convert_out_to_series(trend, y.index, 'trend')
irregular = _convert_out_to_series(irregular, y.index, 'irregular')

#NOTE: there isn't likely anything in stdout that's not in results
# so may be safe to just suppress and remove it
if not retspec:
return results, seasadj, trend, irregular, stdout
else:
return results, seasadj, trend, irregular, stdout, spec


def select_arima_order(y, x12path=None, *args, **kwargs):
_check_x12(x12path)
(results,
seasadj,
trend,
irregular,
stdout) = x13arima_analysis(x12path, y, *args, **kwargs)
model = re.search("(?<=Final automatic model choice : ).*", results)
order = model.group()
if re.search("Mean is not significant", results):
include_mean = False
elif re.search("Constant", results):
include_mean = True
else:
include_mean = False
order, sorder = _clean_order(order)
res = Bunch(order=order, sorder=sorder, include_mean=include_mean,
results=results, stdout=stdout)
return res


if __name__ == "__main__":
import pandas as pd
import numpy as np
from statsmodels.tsa.arima_process import ArmaProcess
np.random.seed(123)
ar = [1, .35, .8]
ma = [1, .8]
arma = ArmaProcess(ar, ma, nobs=100)
assert arma.isstationary()
assert arma.isinvertible()
y = arma.generate_sample()
dates = pd.date_range("1/1/1990", periods=len(y), freq='M')
ts = pd.TimeSeries(y, index=dates)

xpath = "/home/skipper/src/x12arima/x12a"

try:
results = x13arima_analysis(xpath, ts)
except:
print "Caught exception"

results = x13arima_analysis(xpath, ts, log=False)

#seas_y = pd.read_csv("usmelec.csv")
#seas_y = pd.TimeSeries(seas_y["usmelec"].values,
# index=pd.DatetimeIndex(seas_y["date"], freq="MS"))
#results = x13arima_analysis(xpath, seas_y)

0 comments on commit 72ae5be

Please sign in to comment.