In [None]:
#here are some example cases for creating an interval list with pandas

In [1]:
import pandas as pd
import cudf
import numpy as np

In [None]:
#the line below will give use a series made of an interval set of numbers

In [2]:
pd.Series(([pd.Interval(1, 2, closed='left'),pd.Interval(3, 4, closed='left')]))

0    [1, 2)
1    [3, 4)
dtype: interval

In [None]:
#the line below will give us a dataframe made of an interval set of numbers

In [7]:
pd.DataFrame([pd.Interval(1, 2, closed='left'),pd.Interval(3, 4, closed='left')], dtype='interval')

Unnamed: 0,0
0,"[1, 2)"
1,"[3, 4)"


In [None]:
#note that it only accepts two arguments for an interval dataframe.The intervals for the first column and then intervals for an index column, nothing more than that!

In [None]:
#lets now look at the interval index. An interval index is constructed by calling the interval_range method

In [8]:
pd.interval_range(start=0, end=5)

IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
              closed='right',
              dtype='interval[int64]')

In [None]:
#you can get this array of intervals using interval array

In [10]:
pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])

<IntervalArray>
[(0, 1], (1, 5]]
Length: 2, closed: right, dtype: interval[int64]

In [None]:
#the below is used to create an interval index from the numbers in this set. Its used interval range 

In [11]:
pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4, 5])

IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
              closed='right',
              dtype='interval[int64]')

In [None]:
#the intervalindex.from_breaks method actually calls the intervalarray.from_breaks method to work

In [12]:
pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3, 4, 5])

<IntervalArray>
[(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
Length: 5, closed: right, dtype: interval[int64]

In [None]:
#we want to use the interval index method in the cut method to create bins. 
#the cut method returns an interval index or an array of interval arrays where each interval array represents a bin

In [72]:
pd.cut(np.array([1, 7, 5, 4, 6, 3]), 2)

[(0.994, 4.0], (4.0, 7.0], (4.0, 7.0], (0.994, 4.0], (4.0, 7.0], (0.994, 4.0]]
Categories (2, interval[float64]): [(0.994, 4.0] < (4.0, 7.0]]

In [None]:
#it looks like for the frequency you take the (higest number-lowest number)/(int, which is the number if bins)

In [76]:
pd.interval_range(start=1, end=7, freq=6/2)        

IntervalIndex([(1.0, 4.0], (4.0, 7.0]],
              closed='right',
              dtype='interval[float64]')

In [None]:
#note a closed set [] is inclusive of its end points, an open set () is not inclusive of its boundaries 
#therefore (5.0, 7.0] is a set from 5 to 7, but does not include 5 and includes 7, therfore 7 would be in this bin
# (3.0, 5.0] is a set that does not include 3 but includes 5, therefore 5 would be in this bin

#the above sets from the cut example are. Note the first number is basically starting at 1 but removing 0.1% from it so that the set can include the number 1
# (0.994,3.0], (3.0,5.0], (5.0,7.0]

In [31]:
np.linspace(1,7,3)

array([1., 4., 7.])

In [None]:
#So it looks like the first thing we need to do is create the interval_range method
#this method takes a start and end paramenter that signify where the first array will begin 
#and then where the last array will end. 
#the frequency paramenter signifies what amount to increment each array by
#

In [104]:
arr = []
init = 0 #start
freq = 2 #freq if other than 1
sf = 0+2 #start+freq
ef = 5+1 #end + 1
#range(start+freq, end+1,freq)
for i in range(sf,ef,freq):
    set_arr = pd.Interval(init,i)
    arr.append(set_arr)
    init = i
    
new_arr = pd.arrays.IntervalArray(arr)
print(new_arr)
print(new_arr.closed)
print(new_arr.dtype)
print(new_arr.__dict__)

<IntervalArray>
[(0, 2], (2, 4]]
Length: 2, closed: right, dtype: interval[int64]
right
interval[int64]
{'_left': Int64Index([0, 2], dtype='int64'), '_right': Int64Index([2, 4], dtype='int64'), '_closed': 'right'}


In [87]:
pd.interval_range(start=0, end=5, freq=2)

IntervalIndex([(0, 2], (2, 4]],
              closed='right',
              dtype='interval[int64]')

In [107]:
pd.arrays.IntervalArray.from_breaks([i for i in range(0,6,2)])
# pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])

<IntervalArray>
[(0, 2], (2, 4]]
Length: 2, closed: right, dtype: interval[int64]

In [None]:
#Lets looks at Interval Index next, these are the examples that have worked


In [None]:
In [1]: import cudf

In [2]: import pandas as pd

In [3]: g = cudf.IntervalIndex([pd.Interval(1, 2, closed='left'),pd.Interval(3, 4, closed='left')])

In [4]: g
Out[4]: 
IntervalIndex([[1, 2), [3, 4)],
              closed='left',
              dtype='interval')

In [5]: pd.IntervalIndex([pd.Interval(1, 2, closed='left'),pd.Interval(3, 4, closed='left')])
Out[5]: 
IntervalIndex([[1, 2), [3, 4)],
              closed='left',
              dtype='interval[int64]')

In [6]: cudf.interval_range(0,5)
Out[6]: 
IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
              closed='right',
              dtype='interval')

In [7]: pd.interval_range(0,5)
Out[7]: 
IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
              closed='right',
              dtype='interval[int64]')