Skip to content

Commit

Permalink
Add functionality to flatten/unflatten weights
Browse files Browse the repository at this point in the history
Resolves: #5
  • Loading branch information
matthewgilbert committed Apr 14, 2018
1 parent 13bfaca commit ed3dac2
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 1 deletion.
92 changes: 91 additions & 1 deletion mapping/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@ def setUp(self):
def tearDown(self):
pass

def test_read_price_data(self):
def assert_dict_of_frames(self, dict1, dict2):
self.assertEquals(dict1.keys(), dict2.keys())
for key in dict1:
assert_frame_equal(dict1[key], dict2[key])

def test_read_price_data(self):
# using default name_func in read_price_data()
df = util.read_price_data(self.prices)
dt1 = TS("2014-09-30")
Expand Down Expand Up @@ -581,3 +585,89 @@ def test_weighted_expiration_two_generics(self):
TS('2015-01-05')],
columns=["CL1", "CL2"])
assert_frame_equal(wexp, exp_wexp)

def test_flatten(self):
vals = [[1, 0], [0, 1], [1, 0], [0, 1]]
widx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'CLF5'),
(TS('2015-01-03'), 'CLG5'),
(TS('2015-01-04'), 'CLG5'),
(TS('2015-01-04'), 'CLH5')])
weights = pd.DataFrame(vals, index=widx, columns=["CL1", "CL2"])
flat_wts = util.flatten(weights)

flat_wts_exp = pd.DataFrame(
{"date": [TS('2015-01-03')] * 4 + [TS('2015-01-04')] * 4,
"contract": ['CLF5'] * 2 + ['CLG5'] * 4 + ['CLH5'] * 2,
"generic": ["CL1", "CL2"] * 4,
"weight": [1, 0, 0, 1, 1, 0, 0, 1]}
).loc[:, ["date", "contract", "generic", "weight"]]
assert_frame_equal(flat_wts, flat_wts_exp)

def test_flatten_dict(self):
vals = [[1, 0], [0, 1], [1, 0], [0, 1]]
widx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'CLF5'),
(TS('2015-01-03'), 'CLG5'),
(TS('2015-01-04'), 'CLG5'),
(TS('2015-01-04'), 'CLH5')])
weights1 = pd.DataFrame(vals, index=widx, columns=["CL1", "CL2"])
widx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'COF5')])
weights2 = pd.DataFrame(1, index=widx, columns=["CO1"])
weights = {"CL": weights1, "CO": weights2}
flat_wts = util.flatten(weights)
flat_wts_exp = pd.DataFrame(
{"date": ([TS('2015-01-03')] * 4 + [TS('2015-01-04')] * 4
+ [TS('2015-01-03')]),
"contract": (['CLF5'] * 2 + ['CLG5'] * 4 + ['CLH5'] * 2
+ ["COF5"]),
"generic": ["CL1", "CL2"] * 4 + ["CO1"],
"weight": [1, 0, 0, 1, 1, 0, 0, 1, 1],
"key": ["CL"] * 8 + ["CO"]}
).loc[:, ["date", "contract", "generic", "weight", "key"]]
assert_frame_equal(flat_wts, flat_wts_exp)

def test_unflatten(self):
flat_wts = pd.DataFrame(
{"date": [TS('2015-01-03')] * 4 + [TS('2015-01-04')] * 4,
"contract": ['CLF5'] * 2 + ['CLG5'] * 4 + ['CLH5'] * 2,
"generic": ["CL1", "CL2"] * 4,
"weight": [1, 0, 0, 1, 1, 0, 0, 1]}
).loc[:, ["date", "contract", "generic", "weight"]]
wts = util.unflatten(flat_wts)

vals = [[1, 0], [0, 1], [1, 0], [0, 1]]
widx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'CLF5'),
(TS('2015-01-03'), 'CLG5'),
(TS('2015-01-04'), 'CLG5'),
(TS('2015-01-04'), 'CLH5')],
names=("date", "contract"))
cols = pd.Index(["CL1", "CL2"], name="generic")
wts_exp = pd.DataFrame(vals, index=widx, columns=cols)
assert_frame_equal(wts, wts_exp)

def test_unflatten_dict(self):
flat_wts = pd.DataFrame(
{"date": ([TS('2015-01-03')] * 4 + [TS('2015-01-04')] * 4
+ [TS('2015-01-03')]),
"contract": (['CLF5'] * 2 + ['CLG5'] * 4 + ['CLH5'] * 2
+ ["COF5"]),
"generic": ["CL1", "CL2"] * 4 + ["CO1"],
"weight": [1, 0, 0, 1, 1, 0, 0, 1, 1],
"key": ["CL"] * 8 + ["CO"]}
).loc[:, ["date", "contract", "generic", "weight", "key"]]
wts = util.unflatten(flat_wts)

vals = [[1, 0], [0, 1], [1, 0], [0, 1]]
widx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'CLF5'),
(TS('2015-01-03'), 'CLG5'),
(TS('2015-01-04'), 'CLG5'),
(TS('2015-01-04'), 'CLH5')],
names=("date", "contract"))
cols = pd.Index(["CL1", "CL2"], name="generic")
weights1 = pd.DataFrame(vals, index=widx, columns=cols)
widx = pd.MultiIndex.from_tuples([(TS('2015-01-03'), 'COF5')],
names=("date", "contract"))
cols = pd.Index(["CO1"], name="generic")
weights2 = pd.DataFrame(1, index=widx, columns=cols)
wts_exp = {"CL": weights1, "CO": weights2}

self.assert_dict_of_frames(wts, wts_exp)
98 changes: 98 additions & 0 deletions mapping/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,104 @@ def name_func(x):
return pd.concat(dfs, axis=0).sort_index()


def flatten(weights):
"""
Flatten weights into a long DataFrame.
Parameters
----------
weights: pandas.DataFrame or dict
A DataFrame of instrument weights with a MultiIndex where the top level
contains pandas. Timestamps and the second level is instrument names.
The columns consist of generic names. If dict is given this should be
a dict of pandas.DataFrame in the above format, with keys for different
root generics, e.g. 'CL'
Returns
-------
A long DataFrame of weights, where columns are "date", "contract",
"generic" and "weight". If a dictionary is passed, DataFrame will contain
additional colum "key" containing the key value and be sorted according to
this key value.
Example
-------
>>> vals = [[1, 0], [0, 1], [1, 0], [0, 1]]
>>> widx = pd.MultiIndex.from_tuples([(pd.Timestamp('2015-01-03'), 'CLF5'),
... (pd.Timestamp('2015-01-03'), 'CLG5'),
... (pd.Timestamp('2015-01-04'), 'CLG5'),
... (pd.Timestamp('2015-01-04'), 'CLH5')])
>>> weights = pd.DataFrame(vals, index=widx, columns=["CL1", "CL2"])
>>> util.flatten(weights)
""" # NOQA
if isinstance(weights, pd.DataFrame):
wts = weights.stack().reset_index()
wts.columns = ["date", "contract", "generic", "weight"]
elif isinstance(weights, dict):
wts = []
for key in sorted(weights.keys()):
wt = weights[key].stack().reset_index()
wt.columns = ["date", "contract", "generic", "weight"]
wt.loc[:, "key"] = key
wts.append(wt)
wts = pd.concat(wts, axis=0).reset_index(drop=True)
else:
ValueError("weights must be pd.DataFrame or dict")

return wts


def unflatten(flat_weights):
"""
Pivot weights from long DataFrame into weighting matrix.
Parameters
----------
flat_weights: pandas.DataFrame
A long DataFrame of weights, where columns are "date", "contract",
"generic", "weight" and optionally "key". If "key" column is
present a dictionary of unflattened DataFrames is returned with the
dictionary keys corresponding to the "key" column and each sub
DataFrame containing rows for this key.
Returns
-------
A DataFrame or dict of DataFrames of instrument weights with a MultiIndex
where the top level contains pandas.Timestamps and the second level is
instrument names. The columns consist of generic names. If dict is returned
the dict keys correspond to the "key" column of the input.
Example
-------
>>> long_wts = pd.DataFrame(
... {"date": [TS('2015-01-03')] * 4 + [TS('2015-01-04')] * 4,
... "contract": ['CLF5'] * 2 + ['CLG5'] * 4 + ['CLH5'] * 2,
... "generic": ["CL1", "CL2"] * 4,
... "weight": [1, 0, 0, 1, 1, 0, 0, 1]}
... ).loc[:, ["date", "contract", "generic", "weight"]]
>>> util.unflatten(long_wts)
See also: calc_rets()
""" # NOQA
if flat_weights.columns.contains("key"):
weights = {}
for key in flat_weights.loc[:, "key"].unique():
flt_wts = flat_weights.loc[flat_weights.loc[:, "key"] == key, :]
flt_wts = flt_wts.drop(labels="key", axis=1)
wts = flt_wts.pivot_table(index=["date", "contract"],
columns=["generic"],
values=["weight"])
wts.columns = wts.columns.droplevel(0)
weights[key] = wts
else:
weights = flat_weights.pivot_table(index=["date", "contract"],
columns=["generic"],
values=["weight"])
weights.columns = weights.columns.droplevel(0)

return weights


def calc_rets(returns, weights):
"""
Calculate continuous return series for futures instruments. These consist
Expand Down

0 comments on commit ed3dac2

Please sign in to comment.