Skip to content

Commit

Permalink
CLN: Newer syntax, unicode, iterator range, zip, etc
Browse files Browse the repository at this point in the history
Use new syntax ('except as', print as function, new raise syntax, next
function rather than method, next and __next__ defined throughout,
switchout xrange, etc.)

Now range is always equivalent to 2.X xrange throughout (but need to
import range from py3compat to use it). Also remove range fixer from
setup.py. + compatible long and string types, etc.
  • Loading branch information
jtratner committed Jul 29, 2013
1 parent 5577c0c commit 6c304c7
Show file tree
Hide file tree
Showing 177 changed files with 2,159 additions and 1,765 deletions.
5 changes: 3 additions & 2 deletions bench/alignment.py
@@ -1,13 +1,14 @@
# Setup
from pandas.util.py3compat import range
import numpy as np
import pandas
import la
N = 1000
K = 50
arr1 = np.random.randn(N, K)
arr2 = np.random.randn(N, K)
idx1 = range(N)
idx2 = range(K)
idx1 = list(range(N))
idx2 = list(range(K))

# pandas
dma1 = pandas.DataFrame(arr1, idx1, idx2)
Expand Down
3 changes: 2 additions & 1 deletion bench/bench_get_put_value.py
@@ -1,12 +1,13 @@
from pandas import *
from pandas.util.testing import rands
from pandas.util.py3compat import range

N = 1000
K = 50


def _random_index(howmany):
return Index([rands(10) for _ in xrange(howmany)])
return Index([rands(10) for _ in range(howmany)])

df = DataFrame(np.random.randn(N, K), index=_random_index(N),
columns=_random_index(K))
Expand Down
3 changes: 2 additions & 1 deletion bench/bench_groupby.py
@@ -1,13 +1,14 @@
from pandas import *
from pandas.util.testing import rands
from pandas.util.py3compat import range

import string
import random

k = 20000
n = 10

foo = np.tile(np.array([rands(10) for _ in xrange(k)], dtype='O'), n)
foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n)
foo2 = list(foo)
random.shuffle(foo)
random.shuffle(foo2)
Expand Down
2 changes: 1 addition & 1 deletion bench/bench_join_panel.py
Expand Up @@ -35,7 +35,7 @@ def reindex_on_axis(panels, axis, axis_reindex):
# concatenate values
try:
values = np.concatenate([p.values for p in panels], axis=1)
except (Exception), detail:
except Exception as detail:
raise Exception("cannot append values that dont' match dimensions! -> [%s] %s"
% (','.join(["%s" % p for p in panels]), str(detail)))
# pm('append - create_panel')
Expand Down
10 changes: 6 additions & 4 deletions bench/bench_khash_dict.py
@@ -1,12 +1,14 @@
"""
Some comparisons of khash.h to Python dict
"""
from __future__ import print_function

import numpy as np
import os

from vbench.api import Benchmark
from pandas.util.testing import rands
from pandas.util.py3compat import range
import pandas._tseries as lib
import pandas._sandbox as sbx
import time
Expand All @@ -22,7 +24,7 @@ def object_test_data(n):


def string_test_data(n):
return np.array([rands(10) for _ in xrange(n)], dtype='O')
return np.array([rands(10) for _ in range(n)], dtype='O')


def int_test_data(n):
Expand Down Expand Up @@ -50,7 +52,7 @@ def f():

def _timeit(f, iterations=10):
start = time.time()
for _ in xrange(iterations):
for _ in range(iterations):
foo = f()
elapsed = time.time() - start
return elapsed
Expand All @@ -73,8 +75,8 @@ def lookup_khash(values):


def leak(values):
for _ in xrange(100):
print proc.get_memory_info()
for _ in range(100):
print(proc.get_memory_info())
table = lookup_khash(values)
# table.destroy()

Expand Down
13 changes: 7 additions & 6 deletions bench/bench_merge.py
@@ -1,13 +1,14 @@
from pandas import *
from pandas.util.testing import rands
from pandas.util.py3compat import range
import random

N = 10000
ngroups = 10


def get_test_data(ngroups=100, n=N):
unique_groups = range(ngroups)
unique_groups = list(range(ngroups))
arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object)

if len(arr) < n:
Expand All @@ -34,8 +35,8 @@ def get_test_data(ngroups=100, n=N):
from pandas.util.testing import rands
N = 10000

indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices = np.array([rands(10) for _ in range(N)], dtype='O')
indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)

Expand All @@ -55,7 +56,7 @@ def get_test_data(ngroups=100, n=N):
f = lambda: merge(left, right, how=join_method, sort=sort)
gc.disable()
start = time.time()
for _ in xrange(niter):
for _ in range(niter):
f()
elapsed = (time.time() - start) / niter
gc.enable()
Expand All @@ -65,7 +66,7 @@ def get_test_data(ngroups=100, n=N):


# R results
from StringIO import StringIO
from pandas.util.py3compat import StringIO
# many to one
r_results = read_table(StringIO(""" base::merge plyr data.table
inner 0.2475 0.1183 0.1100
Expand Down Expand Up @@ -93,7 +94,7 @@ def get_test_data(ngroups=100, n=N):

# many to many

from StringIO import StringIO
from pandas.util.py3compat import StringIO
# many to one
r_results = read_table(StringIO("""base::merge plyr data.table
inner 0.4610 0.1276 0.1269
Expand Down
8 changes: 5 additions & 3 deletions bench/bench_merge_sqlite.py
Expand Up @@ -4,12 +4,14 @@
import time
from pandas import DataFrame
from pandas.util.testing import rands
from pandas.util.py3compat import range
from six.moves import zip
import random

N = 10000

indices = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O')
indices = np.array([rands(10) for _ in range(N)], dtype='O')
indices2 = np.array([rands(10) for _ in range(N)], dtype='O')
key = np.tile(indices[:8000], 10)
key2 = np.tile(indices2[:8000], 10)

Expand Down Expand Up @@ -67,7 +69,7 @@
g = lambda: conn.execute(sql) # list fetches results
gc.disable()
start = time.time()
# for _ in xrange(niter):
# for _ in range(niter):
g()
elapsed = (time.time() - start) / niter
gc.enable()
Expand Down
3 changes: 2 additions & 1 deletion bench/bench_sparse.py
Expand Up @@ -3,6 +3,7 @@

from pandas import *
import pandas.core.sparse as spm
import pandas.util.compat as compat
reload(spm)
from pandas.core.sparse import *

Expand Down Expand Up @@ -41,7 +42,7 @@

def new_data_like(sdf):
new_data = {}
for col, series in sdf.iteritems():
for col, series in compat.iteritems(sdf):
new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)),
index=sdf.index,
sparse_index=series.sp_index,
Expand Down
6 changes: 4 additions & 2 deletions bench/bench_take_indexing.py
@@ -1,10 +1,12 @@
from __future__ import print_function
import numpy as np

from pandas import *
import pandas._tseries as lib

from pandas import DataFrame
import timeit
from six.moves import zip

setup = """
from pandas import Series
Expand Down Expand Up @@ -35,7 +37,7 @@ def _timeit(stmt, size, k=5, iters=1000):
return timer.timeit(n) / n

for sz, its in zip(sizes, iters):
print sz
print(sz)
fancy_2d.append(_timeit('arr[indexer]', sz, iters=its))
take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its))
cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its))
Expand All @@ -44,7 +46,7 @@ def _timeit(stmt, size, k=5, iters=1000):
'take': take_2d,
'cython': cython_2d})

print df
print(df)

from pandas.rpy.common import r
r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)')
Expand Down
26 changes: 14 additions & 12 deletions bench/bench_unique.py
@@ -1,14 +1,17 @@
from __future__ import print_function
from pandas import *
from pandas.util.testing import rands
from pandas.util.py3compat import range
from six.moves import zip
import pandas._tseries as lib
import numpy as np
import matplotlib.pyplot as plt

N = 50000
K = 10000

groups = np.array([rands(10) for _ in xrange(K)], dtype='O')
groups2 = np.array([rands(10) for _ in xrange(K)], dtype='O')
groups = np.array([rands(10) for _ in range(K)], dtype='O')
groups2 = np.array([rands(10) for _ in range(K)], dtype='O')

labels = np.tile(groups, N // K)
labels2 = np.tile(groups2, N // K)
Expand All @@ -20,7 +23,7 @@ def timeit(f, niter):
import time
gc.disable()
start = time.time()
for _ in xrange(niter):
for _ in range(niter):
f()
elapsed = (time.time() - start) / niter
gc.enable()
Expand Down Expand Up @@ -75,9 +78,8 @@ def algo3_sort():


def f():
from itertools import izip
# groupby sum
for k, v in izip(x, data):
for k, v in zip(x, data):
try:
counts[k] += v
except KeyError:
Expand Down Expand Up @@ -128,7 +130,7 @@ def algo4():
# N = 10000000
# K = 500000

# groups = np.array([rands(10) for _ in xrange(K)], dtype='O')
# groups = np.array([rands(10) for _ in range(K)], dtype='O')

# labels = np.tile(groups, N // K)
data = np.random.randn(N)
Expand Down Expand Up @@ -232,11 +234,11 @@ def hash_bench():
khash_hint = []
khash_nohint = []
for K in Ks:
print K
# groups = np.array([rands(10) for _ in xrange(K)])
print(K)
# groups = np.array([rands(10) for _ in range(K)])
# labels = np.tile(groups, N // K).astype('O')

groups = np.random.randint(0, 100000000000L, size=K)
groups = np.random.randint(0, long(100000000000), size=K)
labels = np.tile(groups, N // K)
dict_based.append(timeit(lambda: dict_unique(labels, K), 20))
khash_nohint.append(timeit(lambda: khash_unique_int64(labels, K), 20))
Expand All @@ -245,11 +247,11 @@ def hash_bench():

# memory, hard to get
# dict_based.append(np.mean([dict_unique(labels, K, memory=True)
# for _ in xrange(10)]))
# for _ in range(10)]))
# khash_nohint.append(np.mean([khash_unique(labels, K, memory=True)
# for _ in xrange(10)]))
# for _ in range(10)]))
# khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True)
# for _ in xrange(10)]))
# for _ in range(10)]))

# dict_based_sort.append(timeit(lambda: dict_unique(labels, K,
# sort=True), 10))
Expand Down
8 changes: 6 additions & 2 deletions bench/better_unique.py
@@ -1,9 +1,13 @@
from __future__ import print_function
from pandas import DataFrame
from pandas.util.py3compat import range
from six.moves import zip
import timeit

setup = """
from pandas import Series
import pandas._tseries as _tseries
from pandas.util.py3compat import range
import random
import numpy as np
Expand Down Expand Up @@ -48,11 +52,11 @@ def get_test_data(ngroups=100, n=tot):
numpy_timer = timeit.Timer(stmt='np.unique(arr)',
setup=setup % sz)

print n
print(n)
numpy_result = numpy_timer.timeit(number=n) / n
wes_result = wes_timer.timeit(number=n) / n

print 'Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result)
print('Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result))

wes.append(wes_result)
numpy.append(numpy_result)
Expand Down
8 changes: 5 additions & 3 deletions bench/io_roundtrip.py
@@ -1,16 +1,18 @@
from __future__ import print_function
import time
import os
import numpy as np

import la
import pandas
from pandas.util.py3compat import range
from pandas import datetools, DateRange


def timeit(f, iterations):
start = time.clock()

for i in xrange(iterations):
for i in range(iterations):
f()

return time.clock() - start
Expand Down Expand Up @@ -54,11 +56,11 @@ def rountrip_archive(N, K=50, iterations=10):

pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
pandas_time = timeit(pandas_f, iterations) / iterations
print 'pandas (HDF5) %7.4f seconds' % pandas_time
print('pandas (HDF5) %7.4f seconds' % pandas_time)

pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma)
pickle_time = timeit(pickle_f, iterations) / iterations
print 'pandas (pickle) %7.4f seconds' % pickle_time
print('pandas (pickle) %7.4f seconds' % pickle_time)

# print 'Numpy (npz) %7.4f seconds' % numpy_time
# print 'larry (HDF5) %7.4f seconds' % larry_time
Expand Down

0 comments on commit 6c304c7

Please sign in to comment.