Skip to content

Commit

Permalink
Merge pull request pandas-dev#11272 from jdeschenes/nogil_csv
Browse files Browse the repository at this point in the history
PERF: Removed the GIL from parts of the TextReader class
  • Loading branch information
jreback committed Nov 4, 2015
2 parents eb66bcc + 091df3e commit 774411c
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 84 deletions.
55 changes: 54 additions & 1 deletion asv_bench/benchmarks/gil.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
from .pandas_vb_common import *
from pandas.core import common as com

try:
from cStringIO import StringIO
except ImportError:
from io import StringIO

try:
from pandas.util.testing import test_parallel

have_real_test_parallel = True
except ImportError:
have_real_test_parallel = False


def test_parallel(num_threads=1):

def wrapper(fname):
return fname

return wrapper


Expand Down Expand Up @@ -321,6 +330,7 @@ def run(arr):
algos.kth_smallest(arr, self.k)
run()


class nogil_datetime_fields(object):
goal_time = 0.2

Expand Down Expand Up @@ -435,4 +445,47 @@ def time_nogil_rolling_std(self):
@test_parallel(num_threads=2)
def run(arr, win):
rolling_std(arr, win)
run(self.arr, self.win)
run(self.arr, self.win)


class nogil_read_csv(object):
number = 1
repeat = 5

def setup(self):
if (not have_real_test_parallel):
raise NotImplementedError
# Using the values
self.df = DataFrame(np.random.randn(10000, 50))
self.df.to_csv('__test__.csv')

self.rng = date_range('1/1/2000', periods=10000)
self.df_date_time = DataFrame(np.random.randn(10000, 50), index=self.rng)
self.df_date_time.to_csv('__test_datetime__.csv')

self.df_object = DataFrame('foo', index=self.df.index, columns=self.create_cols('object'))
self.df_object.to_csv('__test_object__.csv')

def create_cols(self, name):
return [('%s%03d' % (name, i)) for i in range(5)]

@test_parallel(num_threads=2)
def pg_read_csv(self):
read_csv('__test__.csv', sep=',', header=None, float_precision=None)

def time_nogil_read_csv(self):
self.pg_read_csv()

@test_parallel(num_threads=2)
def pg_read_csv_object(self):
read_csv('__test_object__.csv', sep=',')

def time_nogil_read_csv_object(self):
self.pg_read_csv_object()

@test_parallel(num_threads=2)
def pg_read_csv_datetime(self):
read_csv('__test_datetime__.csv', sep=',', header=None)

def time_nogil_read_csv_datetime(self):
self.pg_read_csv_datetime()
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.17.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Performance Improvements

- Release the GIL on most datetime field operations (e.g. ``DatetimeIndex.year``, ``Series.dt.year``), normalization, and conversion to and from ``Period``, ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` (:issue:`11263`)
- Release the GIL on some srolling algos (``rolling_median``, ``rolling_mean``, ``rolling_max``, ``rolling_min``, ``rolling_var``, ``rolling_kurt``, `rolling_skew`` (:issue:`11450`)
- Release the GIL when reading and parsing text files in ``read_csv``, ``read_table`` (:issue:`11272`)
- Improved performance of ``rolling_median`` (:issue:`11450`)

- Improved performance to ``to_excel`` (:issue:`11352`)
Expand Down
Loading

0 comments on commit 774411c

Please sign in to comment.