From 85df94fdec8f0900ad20303cb748452c25cd2c3e Mon Sep 17 00:00:00 2001 From: Christopher Whelan Date: Tue, 22 Jan 2019 16:02:54 -0800 Subject: [PATCH] PERF: use new to_records() argument in to_stata() --- pandas/io/stata.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1b0660171ecacb..1c8b9c80943dc8 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2385,8 +2385,7 @@ def _prepare_data(self): data = self._convert_strls(data) # 3. Convert bad string data to '' and pad to correct length - dtypes = [] - data_cols = [] + dtypes = {} has_strings = False native_byteorder = self._byteorder == _set_endianness(sys.byteorder) for i, col in enumerate(data): @@ -2395,22 +2394,16 @@ def _prepare_data(self): has_strings = True data[col] = data[col].fillna('').apply(_pad_bytes, args=(typ,)) stype = 'S{type}'.format(type=typ) - dtypes.append(('c' + str(i), stype)) - string = data[col].str.encode(self._encoding) - data_cols.append(string.values.astype(stype)) + dtypes[col] = stype + data[col] = data[col].str.encode(self._encoding).astype(stype) else: values = data[col].values dtype = data[col].dtype if not native_byteorder: dtype = dtype.newbyteorder(self._byteorder) - dtypes.append(('c' + str(i), dtype)) - data_cols.append(values) - dtypes = np.dtype(dtypes) + dtypes[col] = dtype - if has_strings or not native_byteorder: - self.data = np.fromiter(zip(*data_cols), dtype=dtypes) - else: - self.data = data.to_records(index=False) + self.data = data.to_records(index=False, column_dtypes=dtypes) def _write_data(self): data = self.data