In [18]:
import pandas as pd

In [19]:
from pandas import Series, DataFrame

In [20]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
print(frame)

    state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9
5  Nevada  2003  3.2


In [21]:
#Retreiving a Column
frame['state']

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

In [22]:
#changing order of columns in output
pd.DataFrame(frame, columns=['year', 'state', 'pop','extra'])
#print(frame)

Unnamed: 0,year,state,pop,extra
0,2000,Ohio,1.5,
1,2001,Ohio,1.7,
2,2002,Ohio,3.6,
3,2001,Nevada,2.4,
4,2002,Nevada,2.9,
5,2003,Nevada,3.2,


In [23]:
frame.columns

Index(['state', 'year', 'pop'], dtype='object')

In [24]:
frame.index

RangeIndex(start=0, stop=6, step=1)

In [25]:
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [26]:
frame.year

0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: year, dtype: int64

In [27]:
#Columns can be modified by assignment
frame['extra']=10
print(frame)

    state  year  pop  extra
0    Ohio  2000  1.5     10
1    Ohio  2001  1.7     10
2    Ohio  2002  3.6     10
3  Nevada  2001  2.4     10
4  Nevada  2002  2.9     10
5  Nevada  2003  3.2     10


In [85]:
#When you are assigning lists or arrays to a column, the value’s length must match the
#length of the DataFrame. If you assign a Series, its labels will be realigned exactly to
#the DataFrame’s index, inserting missing values in any holes:
val = pd.Series([1.2, 1.5, 1.7], index=[0,3,4])
print (val,'\n')
frame['extra']=val
print(frame)

0    1.2
3    1.5
4    1.7
dtype: float64 
   
Columns   state  year  pop  extra
Rows                             
0          Ohio  2000  1.5    1.2
1          Ohio  2001  1.7    NaN
2          Ohio  2002  3.6    NaN
3        Nevada  2001  2.4    1.5
4        Nevada  2002  2.9    1.7
5        Nevada  2003  3.2    NaN


In [12]:
#add new column and assign boolean values to it based on outcome
frame['eastern2'] = frame.state == 'Ohio'
print(frame)

    state  year  pop  extra  eastern2
0    Ohio  2000  1.5    1.2      True
1    Ohio  2001  1.7    NaN      True
2    Ohio  2002  3.6    NaN      True
3  Nevada  2001  2.4    1.5     False
4  Nevada  2002  2.9    1.7     False
5  Nevada  2003  3.2    NaN     False


In [29]:
del frame['state']

print(frame)

   year  pop  extra
0  2000  1.5    1.2
1  2001  1.7    NaN
2  2002  3.6    NaN
3  2001  2.4    1.5
4  2002  2.9    1.7
5  2003  3.2    NaN


In [30]:
frame

Unnamed: 0,year,pop,extra
0,2000,1.5,1.2
1,2001,1.7,
2,2002,3.6,
3,2001,2.4,1.5
4,2002,2.9,1.7
5,2003,3.2,


In [31]:
#Nested Dictionary
dfnew = {'Nevada': {2001: 2.4, 2002: 2.9},
'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
df1=pd.DataFrame(dfnew)
print(df1)
#If the nested dict is passed to the DataFrame, pandas will interpret the outer dict keys
#as the columns and the inner keys as the row indices

      Nevada  Ohio
2001     2.4   1.7
2002     2.9   3.6
2000     NaN   1.5


In [32]:
#Transpose
df1.T

Unnamed: 0,2001,2002,2000
Nevada,2.4,2.9,
Ohio,1.7,3.6,1.5


In [17]:
frame

Unnamed: 0,year,pop,extra,eastern2
0,2000,1.5,1.2,True
1,2001,1.7,,True
2,2002,3.6,,True
3,2001,2.4,1.5,False
4,2002,2.9,1.7,False
5,2003,3.2,,False


In [35]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002, 2003],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
print(frame)


    state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9
5  Nevada  2003  3.2


In [88]:
#to set row name and column name
frame.index.name='Rows '
frame.columns.name='Columns '

In [45]:
frame

Columns,state,year,pop
Rows,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [47]:
#As with Series, the values attribute returns the data contained in the DataFrame as a
#two-dimensional ndarray:
frame.values

array([['Ohio', 2000, 1.5],
       ['Ohio', 2001, 1.7],
       ['Ohio', 2002, 3.6],
       ['Nevada', 2001, 2.4],
       ['Nevada', 2002, 2.9],
       ['Nevada', 2003, 3.2]], dtype=object)

In [48]:
#a series has been created to learn about Index Object in Pandas
obj = pd.Series(range(3), index=['a', 'b', 'c'])

In [50]:
obj

a    0
b    1
c    2
dtype: int64

In [52]:
ind=obj.index
print (ind)

Index(['a', 'b', 'c'], dtype='object')


In [55]:
ind[1:]

Index(['b', 'c'], dtype='object')

In [58]:
#Another Example Index Object has been created and passed to series
import numpy as np
labels = pd.Index(np.arange(3))
print (labels)

Int64Index([0, 1, 2], dtype='int64')


In [90]:
obj2 = pd.Series([5.2, -2, 0],index=labels)
obj2

0    5.2
1   -2.0
2    0.0
dtype: float64

In [62]:
obj2.index is labels

True

In [93]:
#Another Example
frame

Columns,state,year,pop,extra
Rows,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Ohio,2000,1.5,1.2
1,Ohio,2001,1.7,
2,Ohio,2002,3.6,
3,Nevada,2001,2.4,1.5
4,Nevada,2002,2.9,1.7
5,Nevada,2003,3.2,


In [94]:
print(frame.shape)#Returns number of rows and columns

(6, 4)


In [67]:
frame.index

RangeIndex(start=0, stop=6, step=1, name='Rows')

In [69]:
frame.columns

Index(['state', 'year', 'pop'], dtype='object', name='Columns')

In [76]:
#In addition to being array-like, an Index also behaves like a fixed-size set:
'year' in frame.columns

True

In [72]:
4 in frame.index

True

In [75]:
'name' in frame.columns

False

In [79]:
#Unlike Python sets, a pandas Index can contain duplicate labels:
dup_labels = pd.Index(['foo', 'foo', 'bar', 'bar'])
dup_labels
#Selections with duplicate labels will select all occurrences of that label.

Index(['foo', 'foo', 'bar', 'bar'], dtype='object')

In [None]:
#Renaming Series

In [110]:
s = pd.Series(np.random.randn(5), name="something")

In [114]:
s.name

'something'

In [116]:
s2 = s.rename("different")
#Note that s and s2 refer to different objects.

In [113]:
s2.name

'different'

In [115]:
s.name

'something'