In [1]:
import pandas as pd 
import numpy as np
matrix_data = np.matrix('22,66,140;42,70,148;30,62,125;35,68,160;25,62,152')
row_labels = ['A','B','C','D','E']
column_headings = ['Age', 'Height', 'Weight']
matrix_data

matrix([[ 22,  66, 140],
        [ 42,  70, 148],
        [ 30,  62, 125],
        [ 35,  68, 160],
        [ 25,  62, 152]])

In [2]:
df = pd.DataFrame(data=matrix_data, index=row_labels, columns=column_headings)
print("\nA new DataFrame\n",'-'*25, sep='')
print(df)


A new DataFrame
-------------------------
   Age  Height  Weight
A   22      66     140
B   42      70     148
C   30      62     125
D   35      68     160
E   25      62     152


In [3]:
df[df['Height']>65]

Unnamed: 0,Age,Height,Weight
A,22,66,140
B,42,70,148
D,35,68,160


In [4]:
print("\nRows with Height > 65 inch\n",'-'*35, sep='')
print(df[df['Height']>65])


Rows with Height > 65 inch
-----------------------------------
   Age  Height  Weight
A   22      66     140
B   42      70     148
D   35      68     160


In [25]:
df['Height']>68

A    False
B     True
C    False
D    False
E    False
Name: Height, dtype: bool

In [26]:

booldf1 = df['Height']>65
booldf2 = df['Weight']>145
print("\nRows with Height > 65 inch and Weight >145 lbs\n",'-'*55, sep='')
print(df[(booldf1) & (booldf2)])



Rows with Height > 65 inch and Weight >145 lbs
-------------------------------------------------------
   Age  Height  Weight
B   42      70     148
D   35      68     160


In [27]:
print("\nDataFrame with only Age and Weight columns whose Height > 65 inch\n",'-'*68, sep='')
print(df[booldf1][['Age','Weight']])


DataFrame with only Age and Weight columns whose Height > 65 inch
--------------------------------------------------------------------
   Age  Weight
A   22     140
B   42     148
D   35     160


In [28]:
matrix_data = np.matrix('22,66,140;42,70,148;30,62,125;35,68,160;25,62,152')
row_labels = ['A','B','C','D','E']
column_headings = ['Age', 'Height', 'Weight']

In [29]:
df = pd.DataFrame(data=matrix_data, index=row_labels, columns=column_headings)
print("\nThe DataFrame\n",'-'*25, sep='')
print(df)


The DataFrame
-------------------------
   Age  Height  Weight
A   22      66     140
B   42      70     148
C   30      62     125
D   35      68     160
E   25      62     152


In [40]:
print("\nAfter resetting index\n",'-'*35, sep='')
print(df.reset_index())



After resetting index
-----------------------------------
  index  Age  Height  Weight Profession
0     A   22      66     140    Student
1     B   42      70     148    Teacher
2     C   30      62     125   Engineer
3     D   35      68     160     Doctor
4     E   25      62     152      Nurse


In [31]:
print("\nAfter resetting index with 'drop' option TRUE\n",'-'*45, sep='')
print(df.reset_index(drop=True))
"Student Teacher Engineer Doctor Nurse".split()


After resetting index with 'drop' option TRUE
---------------------------------------------
   Age  Height  Weight
0   22      66     140
1   42      70     148
2   30      62     125
3   35      68     160
4   25      62     152


['Student', 'Teacher', 'Engineer', 'Doctor', 'Nurse']

In [35]:
print("\nAdding a new column 'Profession'\n",'-'*45, sep='')
df['Profession'] = "Student Teacher Engineer Doctor Nurse".split()
print(df)


Adding a new column 'Profession'
---------------------------------------------
   Age  Height  Weight Profession
A   22      66     140    Student
B   42      70     148    Teacher
C   30      62     125   Engineer
D   35      68     160     Doctor
E   25      62     152      Nurse


In [41]:
print("\nSetting 'Profession' column as index\n",'-'*45, sep='')
print (df.set_index('Profession'))


Setting 'Profession' column as index
---------------------------------------------
            Age  Height  Weight
Profession                     
Student      22      66     140
Teacher      42      70     148
Engineer     30      62     125
Doctor       35      68     160
Nurse        25      62     152


# Multi Indexing

In [42]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))


In [43]:
print("\nTuple pairs after the zip and list command\n",'-'*45, sep='')
print(hier_index)


Tuple pairs after the zip and list command
---------------------------------------------
[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]


In [44]:
hier_index = pd.MultiIndex.from_tuples(hier_index)
print("\nIndex hierarchy\n",'-'*25, sep='')
print(hier_index)


Index hierarchy
-------------------------
MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])


In [45]:
print("\nIndex hierarchy type\n",'-'*25, sep='')
print(type(hier_index))


Index hierarchy type
-------------------------
<class 'pandas.core.indexes.multi.MultiIndex'>


In [48]:
from numpy.random import randn as rn
print("\nCreating DataFrame with multi-index\n",'-'*37, sep='')
#np.random.seed(101)
df1 = pd.DataFrame(data=np.round(rn(6,3)), index= hier_index, columns= ['A','B','C'])
print(df1)


Creating DataFrame with multi-index
-------------------------------------
        A    B    C
G1 1 -1.0 -0.0 -0.0
   2  0.0 -0.0 -2.0
   3  0.0 -0.0 -1.0
G2 1  0.0 -0.0  2.0
   2  1.0  1.0  1.0
   3 -1.0  0.0  1.0


In [64]:
#cross tabluation like pivot table
print('\n grabbing a cross-section from outer level\n',','*45, sep='')
print(df1.xs('G1'))


 grabbing a cross-section from outer level
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
     A    B    C
1 -1.0 -0.0 -0.0
2  0.0 -0.0 -2.0
3  0.0 -0.0 -1.0
