-
Notifications
You must be signed in to change notification settings - Fork 68
/
numpy2ri.py
288 lines (237 loc) · 8.47 KB
/
numpy2ri.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import rpy2.robjects as ro
import rpy2.robjects.conversion as conversion
import rpy2.rinterface as rinterface
import rpy2.rlike.container as rlc
from rpy2.rinterface import (Sexp,
StrSexpVector, ByteSexpVector,
RTYPES)
import numpy # type: ignore
import warnings
# TODO: move this to rinterface.
RINT_SIZE = 32
original_converter = None
# The possible kind codes are listed at
# http://numpy.scipy.org/array_interface.shtml
_kinds = {
# "t" -> not really supported by numpy
'b': ro.vectors.BoolVector,
'i': ro.vectors.IntVector,
# "u" -> special-cased below
'f': ro.vectors.FloatVector,
'c': ro.vectors.ComplexVector,
# "O" -> special-cased below
'S': ro.vectors.ByteVector,
'U': ro.vectors.StrVector,
# "V" -> special-cased below
# TODO: datetime64 ?
# "datetime64":
}
_vectortypes = set(
(
# TODO: CHARSXP is an fact a scalar.
RTYPES.CHARSXP,
RTYPES.LGLSXP,
RTYPES.INTSXP,
RTYPES.REALSXP,
RTYPES.CPLXSXP,
RTYPES.STRSXP
)
)
converter = conversion.Converter('original numpy conversion')
py2rpy = converter.py2rpy
rpy2py = converter.rpy2py
def numpy_O_py2rpy(o):
if all(isinstance(x, str) for x in o):
res = StrSexpVector(o)
elif all(isinstance(x, bytes) for x in o):
res = ByteSexpVector(o)
else:
res = conversion.get_conversion().py2rpy(list(o))
return res
def _numpyarray_to_r(a, func):
# "F" means "use column-major order"
vec = func(numpy.ravel(a, order='F'))
# TODO: no dimnames ?
# TODO: optimize what is below needed/possible ?
# (other ways to create R arrays ?)
dim = ro.vectors.IntVector(a.shape)
res = rinterface.baseenv['array'](vec, dim=dim)
return res
def unsignednumpyint_to_rint(intarray):
"""Convert a numpy array of unsigned integers to an R array."""
if intarray.itemsize >= (RINT_SIZE / 8):
raise ValueError(
'Cannot convert numpy array of {numpy_type!s} '
'(R integers are signed {RINT_SIZE}-bit integers).'
.format(numpy_type=intarray.dtype.type,
RINT_SIZE=RINT_SIZE)
)
else:
res = _numpyarray_to_r(intarray, _kinds['i'])
return res
@py2rpy.register(numpy.ndarray)
def numpy2rpy(o):
""" Augmented conversion function, converting numpy arrays into
rpy2.rinterface-level R structures. """
if not o.dtype.isnative:
raise ValueError('Cannot pass numpy arrays with non-native '
'byte orders at the moment.')
# Most types map onto R arrays:
if o.dtype.kind in _kinds:
res = _numpyarray_to_r(o, _kinds[o.dtype.kind])
# R does not support unsigned types:
elif o.dtype.kind == 'u':
res = unsignednumpyint_to_rint(o)
# Array-of-PyObject is treated like a Python list:
elif o.dtype.kind == 'O':
res = numpy_O_py2rpy(o)
# Record arrays map onto R data frames:
elif o.dtype.kind == 'V':
if o.dtype.names is None:
raise ValueError('Nothing can be done for this numpy array '
'type "%s" at the moment.' % (o.dtype,))
df_args = []
cv = conversion.get_conversion()
for field_name in o.dtype.names:
df_args.append((field_name,
cv.py2rpy(o[field_name])))
res = ro.baseenv["data.frame"].rcall(tuple(df_args))
# It should be impossible to get here:
else:
raise ValueError('Unknown numpy array type "%s".' % str(o.dtype))
return res
@py2rpy.register(numpy.integer)
def npint_py2rpy(obj):
return rinterface.IntSexpVector([obj, ])
@py2rpy.register(numpy.floating)
def npfloat_py2rpy(obj):
return rinterface.FloatSexpVector([obj, ])
@py2rpy.register(object)
def nonnumpy2rpy(obj):
# allow array-like objects to also function with this module.
if not isinstance(obj, numpy.ndarray) and hasattr(obj, '__array__'):
obj = obj.__array__()
return ro.default_converter.py2rpy(obj)
elif original_converter is None:
# This means that the conversion module was not "activated".
# For now, go with the default_converter.
# TODO: the conversion system needs an overhaul badly.
return ro.default_converter.py2rpy(obj)
else:
# The conversion module was "activated"
return original_converter.py2rpy(obj)
@py2rpy.register(rlc.OrdDict)
def orddict_py2rpy(obj):
rlist = ro.vectors.ListVector.from_length(len(obj))
rlist.names = ro.vectors.StrVector(tuple(obj.keys()))
with conversion.get_conversion().context() as cv:
# TODO: OrdDict.values() is broken. Use .items() for now.
for i, (k, v) in enumerate(obj.items()):
rlist[i] = cv.py2rpy(v)
return rlist
# TODO: delete ?
# @py2ro.register(numpy.ndarray)
# def numpy2ro(obj):
# res = numpy2ri(obj)
# return ro.vectors.rtypeof2rotype[res.typeof](res)
def _factor_to_numpy_string_array(obj):
levels = obj.do_slot('levels')
res = numpy.array(
tuple(
None if x is rinterface.NA_Character
else levels[x-1] for x in obj
)
)
return res
def rpy2py_data_frame(obj):
# TODO: R "factor" vectors will not convert well by default
# (will become integers), so we build a temporary list o2
# with the factors as strings. This fix might good to have
# as a default.
o2 = list()
# An added complication is that the conversion defined
# in this module will make __getitem__ at the robjects
# level return numpy arrays
with conversion.get_conversion().context() as cv:
for column in rinterface.ListSexpVector(obj):
if 'factor' in column.rclass:
levels = column.do_slot('levels')
column = tuple(
None if x is rinterface.NA_Integer
else levels[x-1] for x in column
)
o2.append(cv.rpy2py(column))
names = obj.do_slot('names')
if names == rinterface.NULL:
res = numpy.rec.fromarrays(o2)
else:
res = numpy.rec.fromarrays(o2, names=tuple(names))
return res
def rpy2py_list(obj: rinterface.ListSexpVector):
# not a data.frame, yet is it still possible to convert it
if not isinstance(obj, ro.vectors.ListVector):
obj = ro.vectors.ListVector(obj)
res = rlc.OrdDict(obj.items())
return res
@rpy2py.register(rinterface.FloatSexpVector)
def rpy2py_floatvector(obj):
return numpy.array(obj)
@rpy2py.register(rinterface.CharSexp)
def rpy2py_charvector(obj):
if obj == rinterface.NA_Character:
return None
else:
return obj
@rpy2py.register(rinterface.StrSexpVector)
def rpy2py_strvector(obj):
res = numpy.array(obj)
res[res == rinterface.NA_Character] = None
return res
@rpy2py.register(Sexp)
def rpy2py_sexp(obj):
if (obj.typeof in _vectortypes) and (obj.typeof != RTYPES.VECSXP):
res = numpy.array(obj)
else:
res = ro.default_converter.rpy2py(obj)
return res
converter._rpy2py_nc_map.update(
{
rinterface.IntSexpVector: conversion.NameClassMap(
numpy.array,
{'factor': _factor_to_numpy_string_array}
),
rinterface.ListSexpVector: conversion.NameClassMap(
rpy2py_list,
{'data.frame': rpy2py_data_frame}
)
}
)
def activate():
warnings.warn('The global conversion available with activate() '
'is deprecated and will be removed in the next major '
'release. Use a local converter.',
category=DeprecationWarning)
global original_converter
# If module is already activated, there is nothing to do
if original_converter is not None:
return
original_converter = conversion.converter_ctx.get()
new_converter = conversion.Converter('numpy conversion',
template=original_converter)
for k, v in py2rpy.registry.items():
if k is object:
continue
new_converter.py2rpy.register(k, v)
for k, v in rpy2py.registry.items():
if k is object:
continue
new_converter.rpy2py.register(k, v)
conversion.set_conversion(new_converter)
def deactivate():
global original_converter
# If module has never been activated or already deactivated,
# there is nothing to do
if original_converter is None:
return
conversion.set_conversion(original_converter)
original_converter = None