In [2]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [63]:
#reindexing
ser = pd.Series([1,2,-1,-2], index=['c','d','b','a'])
ser.astype("int8")
ser2 = ser.reindex(['a','b','d','e'], fill_value=0)
ser2.astype(np.int8)

#forward fill to fill the data
ser3 = pd.Series(['bee','fly','bird'], index=[0,2,4])
ser3
ser3.reindex(range(6),method='ffill')

#reindex with fill_value
ser3.reindex(range(6),fill_value="NoBird")

c    1
d    2
b   -1
a   -2
dtype: int8

a   -2
b   -1
d    2
e    0
dtype: int8

0     bee
2     fly
4    bird
dtype: object

0     bee
1     bee
2     fly
3     fly
4    bird
5    bird
dtype: object

0       bee
1    NoBird
2       fly
3    NoBird
4      bird
5    NoBird
dtype: object

In [19]:
#dropping
data = pd.DataFrame(np.arange(25).reshape((5,5)), columns=["one", "two", "three","four","five"], index=["Ohio","Colorado", "Utah", "New York", "Texas"])
data
#drop and return the new DF, it doesn't modify the original DF
data.drop(["one"], axis=1)
#to modify the original DF, use inplace=True
data.drop(["New York"], inplace=True)

#drop multiple indexes
data.drop(["Ohio","Texas"])
#drop multiple columns
data.drop(["two","four"], axis=1)
data.drop("Ohio")

Unnamed: 0,one,two,three,four,five
Ohio,0,1,2,3,4
Colorado,5,6,7,8,9
Utah,10,11,12,13,14
New York,15,16,17,18,19
Texas,20,21,22,23,24


Unnamed: 0,two,three,four,five
Ohio,1,2,3,4
Colorado,6,7,8,9
Utah,11,12,13,14
New York,16,17,18,19
Texas,21,22,23,24


Unnamed: 0,one,two,three,four,five
Colorado,5,6,7,8,9
Utah,10,11,12,13,14


Unnamed: 0,one,three,five
Ohio,0,2,4
Colorado,5,7,9
Utah,10,12,14
Texas,20,22,24


Unnamed: 0,one,two,three,four,five
Colorado,5,6,7,8,9
Utah,10,11,12,13,14
Texas,20,21,22,23,24


In [60]:
#indexing, selection and filtering
data = pd.DataFrame(np.arange(25).reshape((5,5)), columns=["one", "two", "three","four","five"], index=["Ohio","Colorado", "Utah", "New York", "Texas"])
#Indexing the columns
data["two"] #this produces a series
data[["two","five"]]
#slicing with labels includes even the end value
data["Ohio":"Utah"]

#loc and iloc function for selecting rows
#selecting one will give a Series, multiple returns a Data Frame
data.iloc[0]
data.iloc[[0,2,4]]
data.iloc[-1] #last row
data.iloc[:,0] # first column of the DF
data.iloc[:,-1] #last column of the DF
data.iloc[:3,3:] #selecting both rowsand columns
data.iloc[[0,2,4], [3,1]] #just like reindexing but its a view on DF, so any change will reflect on DF
print("Reverse of DF:")
data.iloc[-1::-1,-1::-1] # REVERSE WHOLE THE DATA FRAME

#same thing we can do with label using loc function
data.loc["Ohio"]
data.loc[:,["two", "five"]]

#we can use loc to apply logical conditions
data.loc[data['one']>5,"three":]


Ohio         1
Colorado     6
Utah        11
New York    16
Texas       21
Name: two, dtype: int32

Unnamed: 0,two,five
Ohio,1,4
Colorado,6,9
Utah,11,14
New York,16,19
Texas,21,24


Unnamed: 0,one,two,three,four,five
Ohio,0,1,2,3,4
Colorado,5,6,7,8,9
Utah,10,11,12,13,14


one      0
two      1
three    2
four     3
five     4
Name: Ohio, dtype: int32

Unnamed: 0,one,two,three,four,five
Ohio,0,1,2,3,4
Utah,10,11,12,13,14
Texas,20,21,22,23,24


one      20
two      21
three    22
four     23
five     24
Name: Texas, dtype: int32

Ohio         0
Colorado     5
Utah        10
New York    15
Texas       20
Name: one, dtype: int32

Ohio         4
Colorado     9
Utah        14
New York    19
Texas       24
Name: five, dtype: int32

Unnamed: 0,four,five
Ohio,3,4
Colorado,8,9
Utah,13,14


Unnamed: 0,four,two
Ohio,3,1
Utah,13,11
Texas,23,21


Reverse of DF:


Unnamed: 0,five,four,three,two,one
Texas,24,23,22,21,20
New York,19,18,17,16,15
Utah,14,13,12,11,10
Colorado,9,8,7,6,5
Ohio,4,3,2,1,0


one      0
two      1
three    2
four     3
five     4
Name: Ohio, dtype: int32

Unnamed: 0,two,five
Ohio,1,4
Colorado,6,9
Utah,11,14
New York,16,19
Texas,21,24


Unnamed: 0,three,four,five
Utah,12,13,14
New York,17,18,19
Texas,22,23,24


#### Arithmatic Operations
When adding together objects, if any index pairs are not the same, the respective index in the result will be the union of the index pairs. The internal data alignment introduces NA values in the indices that don’t overlap. Missing values propagate in arithmetic computations.
In the case of DataFrame, alignment is performed on both the rows and the columns
To avoid NA values in the DF/Series we can use FillValue to fill the value where data is not available.
Different arthimatic Operations we can perform are <b>add, sub, div, mul</b> using these methods we can perform all the arithmetic operations on the DF.

In [71]:
#arithmatic operartions
#series
s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
s1+s2

#Data Frame
df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'), index=['Ohio', 'Texas', 'Colorado'])
df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
df1+df2
#arithmatic operation with fill_value
df1.add(df2,fill_value=0)


a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,


Unnamed: 0,b,c,d,e
Colorado,6.0,7.0,8.0,
Ohio,3.0,1.0,6.0,5.0
Oregon,9.0,,10.0,11.0
Texas,9.0,4.0,12.0,8.0
Utah,0.0,,1.0,2.0


Unnamed: 0,b,c,d,e
Colorado,6.0,7.0,8.0,
Ohio,3.0,1.0,6.0,5.0
Oregon,9.0,,10.0,11.0
Texas,9.0,4.0,12.0,8.0
Utah,0.0,,1.0,2.0


#### Function application and mapping:
Another frequent operation is applying a function on 1D arrays to each column or row.
DataFrame’s apply method does exactly this.

The function passed to apply need not return a scalar value, it can also return a Series
with multiple values. When applying such function the resulted output will be dataframe instead of a series.

Element wise operations can also be performed on the DF by using "applymap" method.
Element operations on a Series can be performed using "map" method.

In [86]:
frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
frame
fun = lambda x:x.max()-x.min()
frame.apply(fun)
frame.apply(fun, axis=1)

#apply returning a series
def funs(x):
    return pd.Series([x.max(),x.min()], index=['max','min'])
frame.apply(funs)
frame.apply(funs,axis=1)

#element wise operations on a DF.
format_fun = lambda x: "%.2f" % x
frame.applymap(format_fun)
#element wise operation on a Series
ser = pd.Series([0.24675,-1.23456,2.45321], index=list("bde"))
ser.map(format_fun)

Unnamed: 0,b,d,e
Utah,-1.91251,-0.887535,0.866764
Ohio,-0.246286,0.173955,-0.054737
Texas,0.574237,-0.200886,-1.857426
Oregon,0.818779,-0.578389,0.116118


b    2.731289
d    1.061490
e    2.724190
dtype: float64

Utah      2.779274
Ohio      0.420241
Texas     2.431662
Oregon    1.397168
dtype: float64

Unnamed: 0,b,d,e
max,0.818779,0.173955,0.866764
min,-1.91251,-0.887535,-1.857426


Unnamed: 0,max,min
Utah,0.866764,-1.91251
Ohio,0.173955,-0.246286
Texas,0.574237,-1.857426
Oregon,0.818779,-0.578389


Unnamed: 0,b,d,e
Utah,-1.91,-0.89,0.87
Ohio,-0.25,0.17,-0.05
Texas,0.57,-0.2,-1.86
Oregon,0.82,-0.58,0.12


b     0.25
d    -1.23
e     2.45
dtype: object

#### Sorting
To sort lexicographically by row or column index, use the sort_index method, which returns a new, sorted object.

To sort a Series by its values, use its sort_values method. Any missing values are sorted to the end of the Series by default
On DataFrame, we may want to sort by the values in one or more columns. To do so, we have to pass one or more column names to the by option.
If NaN values present they will be kept in the end, to keep at the beggining


In [96]:
#Series
obj = pd.Series(range(5), index=[2,3,1,0,4])
obj.sort_index()
obj.sort_index(ascending=False)
#Dataframe
frame = pd.DataFrame(np.arange(16).reshape((4,4)), index=[2,3,0,1], columns=["two", "three","one", "four"])
frame.sort_index()
frame.sort_index(axis=1)
frame.sort_index().sort_index(axis=1)

#To sort a Series by its values, use its sort_values method
#series
obj.sort_values()
frame.sort_values(by="one")
frame.sort_values(by=["one","four"])

0    3
1    2
2    0
3    1
4    4
dtype: int64

4    4
3    1
2    0
1    2
0    3
dtype: int64

Unnamed: 0,two,three,one,four
0,8,9,10,11
1,12,13,14,15
2,0,1,2,3
3,4,5,6,7


Unnamed: 0,four,one,three,two
2,3,2,1,0
3,7,6,5,4
0,11,10,9,8
1,15,14,13,12


Unnamed: 0,four,one,three,two
0,11,10,9,8
1,15,14,13,12
2,3,2,1,0
3,7,6,5,4


2    0
3    1
1    2
0    3
4    4
dtype: int64

Unnamed: 0,two,three,one,four
2,0,1,2,3
3,4,5,6,7
0,8,9,10,11
1,12,13,14,15


Unnamed: 0,two,three,one,four
2,0,1,2,3
3,4,5,6,7
0,8,9,10,11
1,12,13,14,15


###### Indexes with duplicates
Its not mandatory to have unique labels but few functions needs unique like reindex.
is_index property tells if the Series/DF has unique labels or not.

Series:
Data selection behaves differently with duplicates, indexing a value with multiple entries returns a Series. Single entry one returns a scalar value
DataFrame:
same thing applies with DFs selecting a row with multiple entries returns a DF, single entry one returns a Series.


In [106]:
obj = pd.Series(range(5), index=['a', 'a', 'b', 'b', 'c'])
#check if unique labels or not
obj.index.is_unique
obj

df = pd.DataFrame(np.random.randn(4,3), index=['a','a','b', 'b'])
df.index.is_unique
df.loc['a']

False

a    0
a    1
b    2
b    3
c    4
dtype: int64

False

Unnamed: 0,0,1,2
a,0.461916,-0.42057,0.425093
a,1.348367,1.048457,-1.102584


###### unique
unique values may not be in sorted order.

In [116]:
obj = pd.Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])
obj.unique()
pd.Series(obj.unique()).sort_values()

array(['c', 'a', 'd', 'b'], dtype=object)

1    a
3    b
0    c
2    d
dtype: object

###### value_counts
returns the frequency of the values, always the its sorted based on the count of the values(not the values). To avoid sorting give sort=False.

In [127]:
obj.value_counts()
obj.value_counts(sort=False)
# we can apply value counts on DataFrame using apply functionality
fun = pd.value_counts
data = pd.DataFrame({'Qu1': ["one", "three", "four", "three", "four"], 'Qu2': ["two", "three", "one", "two", "three"], 'Qu3': ["one", "five", "two", "four","four"]})
data
data.apply(fun).fillna(0)
data.apply(fun, axis=1).fillna(0)

c    3
a    3
b    2
d    1
dtype: int64

a    3
b    2
d    1
c    3
dtype: int64

Unnamed: 0,Qu1,Qu2,Qu3
0,one,two,one
1,three,three,five
2,four,one,two
3,three,two,four
4,four,three,four


Unnamed: 0,Qu1,Qu2,Qu3
five,0.0,0.0,1.0
four,2.0,0.0,2.0
one,1.0,1.0,1.0
three,2.0,2.0,0.0
two,0.0,2.0,1.0


Unnamed: 0,five,four,one,three,two
0,0.0,0.0,2.0,0.0,1.0
1,1.0,0.0,0.0,2.0,0.0
2,0.0,1.0,1.0,0.0,1.0
3,0.0,1.0,0.0,1.0,1.0
4,0.0,2.0,0.0,1.0,0.0


### Handling Missing Data
###### dropna :
by default it drops any row/column contain any missing data. To drop only the rows/columns with all NA values is by setting <b>how="all"</b><br>
To keep data with certain number values is by setting <b>thresh=num</b>, so it keeps row/columns with values at least <b>thresh</b> not null values.<br>
<b>**dropna returns a new object</b>

###### isnull:
###### notnull:

In [139]:
data =pd.Series([4,3,np.nan,5,np.nan])
data
data.dropna(inplace=True)
data

frame = pd.DataFrame(np.random.randn(5,5))
frame.iloc[:3,3:]=np.nan
frame.iloc[3,3]=np.nan
frame
frame.dropna(thresh=4)

0    4.0
1    3.0
2    NaN
3    5.0
4    NaN
dtype: float64

0    4.0
1    3.0
3    5.0
dtype: float64

Unnamed: 0,0,1,2,3,4
0,0.472071,0.730822,-0.250584,,
1,0.156358,0.498467,-0.477638,,
2,-0.24858,1.995055,1.718951,,
3,0.055205,0.888357,-0.212653,,0.024068
4,-0.091958,-0.347416,1.587054,-3.562238,0.138503


Unnamed: 0,0,1,2,3,4
3,0.055205,0.888357,-0.212653,,0.024068
4,-0.091958,-0.347416,1.587054,-3.562238,0.138503


##### Filling Missing Data
###### fillna:
fill a missing value with a data. To fill each column with different value we can provide that information as dict.<br>
value ---------> Scalar value or dict-like object to use to fill missing values<br>
method ---------> Interpolation, by default 'ffill' if function called with no other arguments<br>
axis -----------> Axis to fill on, default axis=0<br>
inplace --------> Modify the calling object without producing a copy<br>
limit ----------> For forward and backward filling, maximum number of consecutive periods to fill<br>
<b>** fillna returns a new object</b>


In [147]:
frame = pd.DataFrame(np.random.randn(5,5), columns=list("abcde"))
frame.iloc[:3,3:]=np.nan
frame.iloc[3,3]=np.nan
frame.iloc[2,2]=np.nan
frame
frame.fillna(0)
frame.fillna({'c':0,'d':1,'e':-1})
frame.fillna(method="bfill", limit=2)

Unnamed: 0,a,b,c,d,e
0,-0.024232,-0.801202,0.507342,,
1,-1.479818,-0.31383,0.282356,,
2,-1.218883,-1.12432,,,
3,-1.355439,1.179219,0.371203,,-1.334355
4,-1.534132,1.247024,1.134593,-1.120633,-1.148951


Unnamed: 0,a,b,c,d,e
0,-0.024232,-0.801202,0.507342,0.0,0.0
1,-1.479818,-0.31383,0.282356,0.0,0.0
2,-1.218883,-1.12432,0.0,0.0,0.0
3,-1.355439,1.179219,0.371203,0.0,-1.334355
4,-1.534132,1.247024,1.134593,-1.120633,-1.148951


Unnamed: 0,a,b,c,d,e
0,-0.024232,-0.801202,0.507342,1.0,-1.0
1,-1.479818,-0.31383,0.282356,1.0,-1.0
2,-1.218883,-1.12432,0.0,1.0,-1.0
3,-1.355439,1.179219,0.371203,1.0,-1.334355
4,-1.534132,1.247024,1.134593,-1.120633,-1.148951


Unnamed: 0,a,b,c,d,e
0,-0.024232,-0.801202,0.507342,,
1,-1.479818,-0.31383,0.282356,,-1.334355
2,-1.218883,-1.12432,0.371203,-1.120633,-1.334355
3,-1.355439,1.179219,0.371203,-1.120633,-1.334355
4,-1.534132,1.247024,1.134593,-1.120633,-1.148951


### Heirarchial Indexing 
It provides a way to work with higher dimentional data in a lower dimentional form.
This DataFrame will have MultiIndex instead of a normal index.
We can perform partial indexing with Heirarchial-index, it means we can select one of the index among the multiple indexes.

A Series will be converted to a DataFrame using <b>unstack</b> method.<br>
<b>stack</b> will convert it back to a MultiIndex Series.

A DataFrame can have Heirarchial index with both rows and columns.
Heirarchial index levels can have names.


In [18]:
data = pd.Series(np.random.randn(10), index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'], [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
data
data.index
data['b']
data['b':'c']
data.loc[['b','c']]
#selecting inner levels
data[:,3]

#Convert MultiIndex Series ---> DF
data.unstack()
#DF --> MultiIndex Series
data.unstack().stack()

#DataFrame with MutliIndex
frame = pd.DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])
frame
frame.index.names = ["key1","key2"]
frame.columns.names= ["State", "Color"]
frame

a  1    0.523467
   2    1.208985
   3    0.419605
b  1   -0.081208
   2   -1.500262
   3   -0.505429
c  1   -0.958610
   2   -0.269767
d  2    0.590970
   3   -0.507559
dtype: float64

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 2),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

1   -0.081208
2   -1.500262
3   -0.505429
dtype: float64

b  1   -0.081208
   2   -1.500262
   3   -0.505429
c  1   -0.958610
   2   -0.269767
dtype: float64

b  1   -0.081208
   2   -1.500262
   3   -0.505429
c  1   -0.958610
   2   -0.269767
dtype: float64

a    0.419605
b   -0.505429
d   -0.507559
dtype: float64

Unnamed: 0,1,2,3
a,0.523467,1.208985,0.419605
b,-0.081208,-1.500262,-0.505429
c,-0.95861,-0.269767,
d,,0.59097,-0.507559


a  1    0.523467
   2    1.208985
   3    0.419605
b  1   -0.081208
   2   -1.500262
   3   -0.505429
c  1   -0.958610
   2   -0.269767
d  2    0.590970
   3   -0.507559
dtype: float64

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


Unnamed: 0_level_0,State,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


###### Reordering and Sorting levels in Heirarchial indexing
<b>swaplevel</b> method takes two levels and returns the modified output with interchanged levels of the index.(data is unaltered)

<b>sort_index</b>, on the other hand, sorts the data (stably) using only the values in a single
level. When swapping levels, it’s not uncommon to also use sort_index so that the result
is lexicographically sorted.<br>
for <b>level</b> you can either give level index or level name or as list.<br>
to sort the columns you can set <b>axis=1</b>

**** Data selection performance is much better on hierarchically indexed
objects if the index is lexicographically sorted starting with the outermost
level, that is, the result of calling sort_index().

In [43]:
frame.swaplevel('key1','key2')
#sorting indexes
frame.sort_index(level='key2')
#sort with list of levels
frame.sort_index(level=[0])

#sort columns
frame.sort_index(axis=1,level=0)
frame.sort_index(axis=1,level=[0,1], ascending=False)
frame.sort_index(axis=1,level="State")


Unnamed: 0_level_0,State,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


Unnamed: 0_level_0,State,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


Unnamed: 0_level_0,State,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


Unnamed: 0_level_0,State,Colorado,Ohio,Ohio
Unnamed: 0_level_1,Color,Green,Green,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,2,0,1
a,2,5,3,4
b,1,8,6,7
b,2,11,9,10


Unnamed: 0_level_0,State,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Color,Red,Green,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,1,0,2
a,2,4,3,5
b,1,7,6,8
b,2,10,9,11


Unnamed: 0_level_0,State,Colorado,Ohio,Ohio
Unnamed: 0_level_1,Color,Green,Green,Red
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,2,0,1
a,2,5,3,4
b,1,8,6,7
b,2,11,9,10


TypeError: reset_index() got an unexpected keyword argument 'axis'

##### Summary statistics on levels
Many descriptive and summary statistics on DataFrame and Series have a level option
in which you can specify the level you want to sum by on a particular axis. 

In [36]:
frame.sum(level="Color", axis=1).sum(level='key2')

Color,Green,Red
key2,Unnamed: 1_level_1,Unnamed: 2_level_1
1,16,8
2,28,14


##### Setting DataFrame columns as MultiIndex
DataFrame’s <b>set_index</b> function will create a new DataFrame using one or more of its
columns as the index. By default columns are removed from the dataframe after they added to index but to avoid that we can set <b>drop=False</b>. 

<b>reset_index</b> on the other hand, does the opposite of <b>set_index</b>; the hierarchical index
levels are are moved into the columns

In [41]:
data = pd.DataFrame({'a': range(7), 'b': range(7, 0, -1), 'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'], 
           'd': [0, 1, 2, 0, 1, 2, 3]})
data
data.set_index(['c','d'])
#without removing the columns
data.set_index(['c','d'], drop=False)

#removing heirarchial index and moving them to columns
data.set_index(['c','d']).reset_index()

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,0,0,7,one,0
one,1,1,6,one,1
one,2,2,5,one,2
two,0,3,4,two,0
two,1,4,3,two,1
two,2,5,2,two,2
two,3,6,1,two,3


Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1
