11from importlib import import_module
22
33import numpy as np
4- import pyarrow as pa
54
65import pandas as pd
76
@@ -20,9 +19,9 @@ class Factorize:
2019 [True , False ],
2120 [True , False ],
2221 [
23- "int " ,
24- "uint " ,
25- "float " ,
22+ "int64 " ,
23+ "uint64 " ,
24+ "float64 " ,
2625 "object" ,
2726 "object_str" ,
2827 "datetime64[ns]" ,
@@ -36,28 +35,24 @@ class Factorize:
3635
3736 def setup (self , unique , sort , dtype ):
3837 N = 10 ** 5
39- string_index = tm .makeStringIndex (N )
40- string_arrow = None
41- if dtype == "string[pyarrow]" :
42- try :
43- string_arrow = pd .array (string_index , dtype = "string[pyarrow]" )
44- except ImportError :
45- raise NotImplementedError
46-
47- data = {
48- "int" : pd .Index (np .arange (N ), dtype = "int64" ),
49- "uint" : pd .Index (np .arange (N ), dtype = "uint64" ),
50- "float" : pd .Index (np .random .randn (N ), dtype = "float64" ),
51- "object_str" : string_index ,
52- "object" : pd .Index (np .arange (N ), dtype = "object" ),
53- "datetime64[ns]" : pd .date_range ("2011-01-01" , freq = "h" , periods = N ),
54- "datetime64[ns, tz]" : pd .date_range (
55- "2011-01-01" , freq = "h" , periods = N , tz = "Asia/Tokyo"
56- ),
57- "Int64" : pd .array (np .arange (N ), dtype = "Int64" ),
58- "boolean" : pd .array (np .random .randint (0 , 2 , N ), dtype = "boolean" ),
59- "string[pyarrow]" : string_arrow ,
60- }[dtype ]
38+
39+ if dtype in ["int64" , "uint64" , "Int64" , "object" ]:
40+ data = pd .Index (np .arange (N ), dtype = dtype )
41+ elif dtype == "float64" :
42+ data = pd .Index (np .random .randn (N ), dtype = dtype )
43+ elif dtype == "boolean" :
44+ data = pd .array (np .random .randint (0 , 2 , N ), dtype = dtype )
45+ elif dtype == "datetime64[ns]" :
46+ data = pd .date_range ("2011-01-01" , freq = "h" , periods = N )
47+ elif dtype == "datetime64[ns, tz]" :
48+ data = pd .date_range ("2011-01-01" , freq = "h" , periods = N , tz = "Asia/Tokyo" )
49+ elif dtype == "object_str" :
50+ data = tm .makeStringIndex (N )
51+ elif dtype == "string[pyarrow]" :
52+ data = pd .array (tm .makeStringIndex (N ), dtype = "string[pyarrow]" )
53+ else :
54+ raise NotImplementedError
55+
6156 if not unique :
6257 data = data .repeat (5 )
6358 self .data = data
@@ -74,9 +69,9 @@ class Duplicated:
7469 [True , False ],
7570 ["first" , "last" , False ],
7671 [
77- "int " ,
78- "uint " ,
79- "float " ,
72+ "int64 " ,
73+ "uint64 " ,
74+ "float64 " ,
8075 "string" ,
8176 "datetime64[ns]" ,
8277 "datetime64[ns, tz]" ,
@@ -88,22 +83,20 @@ class Duplicated:
8883
8984 def setup (self , unique , keep , dtype ):
9085 N = 10 ** 5
91- data = {
92- "int" : pd .Index (np .arange (N ), dtype = "int64" ),
93- "uint" : pd .Index (np .arange (N ), dtype = "uint64" ),
94- "float" : pd .Index (np .random .randn (N ), dtype = "float64" ),
95- "string" : tm .makeStringIndex (N ),
96- "datetime64[ns]" : pd .date_range ("2011-01-01" , freq = "h" , periods = N ),
97- "datetime64[ns, tz]" : pd .date_range (
98- "2011-01-01" , freq = "h" , periods = N , tz = "Asia/Tokyo"
99- ),
100- "timestamp[ms][pyarrow]" : pd .Index (
101- np .arange (N ), dtype = pd .ArrowDtype (pa .timestamp ("ms" ))
102- ),
103- "duration[s][pyarrow]" : pd .Index (
104- np .arange (N ), dtype = pd .ArrowDtype (pa .duration ("s" ))
105- ),
106- }[dtype ]
86+ if dtype in ["int64" , "uint64" ]:
87+ data = pd .Index (np .arange (N ), dtype = dtype )
88+ elif dtype == "float64" :
89+ data = pd .Index (np .random .randn (N ), dtype = "float64" )
90+ elif dtype == "string" :
91+ data = tm .makeStringIndex (N )
92+ elif dtype == "datetime64[ns]" :
93+ data = pd .date_range ("2011-01-01" , freq = "h" , periods = N )
94+ elif dtype == "datetime64[ns, tz]" :
95+ data = pd .date_range ("2011-01-01" , freq = "h" , periods = N , tz = "Asia/Tokyo" )
96+ elif dtype in ["timestamp[ms][pyarrow]" , "duration[s][pyarrow]" ]:
97+ data = pd .Index (np .arange (N ), dtype = dtype )
98+ else :
99+ raise NotImplementedError
107100 if not unique :
108101 data = data .repeat (5 )
109102 self .idx = data
@@ -181,21 +174,22 @@ class Quantile:
181174 params = [
182175 [0 , 0.5 , 1 ],
183176 ["linear" , "nearest" , "lower" , "higher" , "midpoint" ],
184- ["float " , "int " , "uint " ],
177+ ["float64 " , "int64 " , "uint64 " ],
185178 ]
186179 param_names = ["quantile" , "interpolation" , "dtype" ]
187180
188181 def setup (self , quantile , interpolation , dtype ):
189182 N = 10 ** 5
190- data = {
191- "int" : np .arange (N ),
192- "uint" : np .arange (N ).astype (np .uint64 ),
193- "float" : np .random .randn (N ),
194- }
195- self .idx = pd .Series (data [dtype ].repeat (5 ))
183+ if dtype in ["int64" , "uint64" ]:
184+ data = np .arange (N , dtype = dtype )
185+ elif dtype == "float64" :
186+ data = np .random .randn (N )
187+ else :
188+ raise NotImplementedError
189+ self .ser = pd .Series (data .repeat (5 ))
196190
197191 def time_quantile (self , quantile , interpolation , dtype ):
198- self .idx .quantile (quantile , interpolation = interpolation )
192+ self .ser .quantile (quantile , interpolation = interpolation )
199193
200194
201195class SortIntegerArray :
0 commit comments