# Pandas series

In [1]:
import pandas as pd
import numpy as np

In [2]:
obj = pd.Series([1,2,3,4,5])

In [3]:
obj

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [4]:
obj.values

array([1, 2, 3, 4, 5], dtype=int64)

In [5]:
obj.index

RangeIndex(start=0, stop=5, step=1)

In [6]:
obj2 = pd.Series([1,2,3,4,5],index=['d','e','a','b','c'])

In [7]:
obj2.values

array([1, 2, 3, 4, 5], dtype=int64)

In [8]:
obj2.index

Index(['d', 'e', 'a', 'b', 'c'], dtype='object')

In [9]:
obj2

d    1
e    2
a    3
b    4
c    5
dtype: int64

In [12]:
obj[obj>2]

2    3
3    4
4    5
dtype: int64

In [14]:
np.array(obj)
obj

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [16]:
np.exp(obj2)

d      2.718282
e      7.389056
a     20.085537
b     54.598150
c    148.413159
dtype: float64

In [21]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
sdata

{'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

In [18]:
obj3 = pd.Series(sdata)

In [20]:
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [22]:
states = ['cali','Ohio','Texas','Oregon']

In [26]:
obj4 = pd.Series(sdata,index = states)

In [28]:
pd.isnull(obj4)

cali       True
Ohio      False
Texas     False
Oregon    False
dtype: bool

In [29]:
pd.notnull(obj4)

cali      False
Ohio       True
Texas      True
Oregon     True
dtype: bool

In [31]:
obj4.isnull()

cali       True
Ohio      False
Texas     False
Oregon    False
dtype: bool

In [32]:
obj3+obj4

Ohio       70000.0
Oregon     32000.0
Texas     142000.0
Utah           NaN
cali           NaN
dtype: float64

In [33]:
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [40]:
obj3.name = "population"
obj3.index.name = "states"

In [41]:
print(obj3.name)
print(obj3.index.name)

population
states


In [44]:
obj3.index

Index(['Ohio', 'Texas', 'Oregon', 'Utah'], dtype='object', name='states')

In [45]:
obj3.index = ["state1","state2","state3","state4"]

In [46]:
obj3

state1    35000
state2    71000
state3    16000
state4     5000
Name: population, dtype: int64

# Pandas DataFrame

In [47]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

In [48]:
data

{'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
 'year': [2000, 2001, 2002, 2001, 2002, 2003],
 'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

In [148]:
frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [52]:
frame.head()

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9


In [56]:
frame[["state","year"]]

Unnamed: 0,state,year
0,Ohio,2000
1,Ohio,2001
2,Ohio,2002
3,Nevada,2001
4,Nevada,2002
5,Nevada,2003


In [57]:
frame.year

0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: year, dtype: int64

In [58]:
frame.loc[1]

state    Ohio
year     2001
pop       1.7
Name: 1, dtype: object

In [76]:
frame2 = pd.DataFrame(data,columns=['year','state','pop','debt'])


In [77]:
frame2

Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,
1,2001,Ohio,1.7,
2,2002,Ohio,3.6,
3,2001,Nevada,2.4,
4,2002,Nevada,2.9,
5,2003,Nevada,3.2,


In [80]:
frame2.debt=[1.6,1.7,1.8,1.9,2.0,3.0]

In [81]:
frame2

Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,1.6
1,2001,Ohio,1.7,1.7
2,2002,Ohio,3.6,1.8
3,2001,Nevada,2.4,1.9
4,2002,Nevada,2.9,2.0
5,2003,Nevada,3.2,3.0


In [88]:
frame2["Eastern"]= frame2["state"] == "Ohio"

In [89]:
frame2

Unnamed: 0,year,state,pop,debt,Eastern
0,2000,Ohio,1.5,1.6,True
1,2001,Ohio,1.7,1.7,True
2,2002,Ohio,3.6,1.8,True
3,2001,Nevada,2.4,1.9,False
4,2002,Nevada,2.9,2.0,False
5,2003,Nevada,3.2,3.0,False


In [90]:
del frame2["Eastern"]

In [91]:
frame2

Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,1.6
1,2001,Ohio,1.7,1.7
2,2002,Ohio,3.6,1.8
3,2001,Nevada,2.4,1.9
4,2002,Nevada,2.9,2.0
5,2003,Nevada,3.2,3.0


In [92]:
frame2["new"] = [1,2,3,4,5,6]

In [95]:
del frame2["new"]

In [96]:
frame2

Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,1.6
1,2001,Ohio,1.7,1.7
2,2002,Ohio,3.6,1.8
3,2001,Nevada,2.4,1.9
4,2002,Nevada,2.9,2.0
5,2003,Nevada,3.2,3.0


In [98]:
frame2.index.name = "Record"
frame2.columns.name = "columns"

In [99]:
frame2

columns,year,state,pop,debt
Record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2000,Ohio,1.5,1.6
1,2001,Ohio,1.7,1.7
2,2002,Ohio,3.6,1.8
3,2001,Nevada,2.4,1.9
4,2002,Nevada,2.9,2.0
5,2003,Nevada,3.2,3.0


In [100]:
frame2.values

array([[2000, 'Ohio', 1.5, 1.6],
       [2001, 'Ohio', 1.7, 1.7],
       [2002, 'Ohio', 3.6, 1.8],
       [2001, 'Nevada', 2.4, 1.9],
       [2002, 'Nevada', 2.9, 2.0],
       [2003, 'Nevada', 3.2, 3.0]], dtype=object)

In [101]:
frame2.index

RangeIndex(start=0, stop=6, step=1, name='Record')

In [107]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object', name='columns')

In [108]:
a = pd.Series([1,2,3,4,5],index=['a','d','e','b','c'])

### Reindexing

In [109]:
a

a    1
d    2
e    3
b    4
c    5
dtype: int64

In [117]:
b = a.reindex(['a','b','c','d','e','f'])

In [118]:
b

a    1.0
b    4.0
c    5.0
d    2.0
e    3.0
f    NaN
dtype: float64

In [135]:
frame2 = frame.reindex([3,4,5,0,1,2])
frame2

Unnamed: 0,state,year,pop
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6


In [134]:
frame2.iloc[[1,2,3]]

Unnamed: 0,state,year,pop
4,Nevada,2002,2.9
5,Nevada,2003,3.2
0,Ohio,2000,1.5


In [149]:
frame.loc[[1,2,3]]

Unnamed: 0,state,year,pop
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4


In [164]:
frame = frame.reindex(range(8),['state','year','pop','deb'],method='bfill')

	'.reindex(a, b)' as 
	'.reindex(index=a, columns=b)'.
Use named arguments to remove any ambiguity. In the future, using positional arguments for 'index' or 'columns' will raise  a 'TypeError'.
  """Entry point for launching an IPython kernel.


In [165]:
frame

Unnamed: 0,state,year,pop,deb
0,Ohio,2001.0,1.7,
1,Ohio,2001.0,1.7,
2,Ohio,2002.0,3.6,
3,Nevada,2001.0,2.4,
4,Nevada,2002.0,2.9,
5,Nevada,2003.0,3.2,
6,,,,
7,,,,


In [160]:
obj3 = pd.Series(['blue', 'purple', 'yellow'],index = [0,3,5])

In [161]:
obj3.reindex(range(6),method='ffill')

0      blue
1      blue
2      blue
3    purple
4    purple
5    yellow
dtype: object

In [182]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),index=['Ohio', 'Colorado', 'Utah', 'New York'],columns=['one', 'two', 'three', 'four'])

In [184]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [185]:
data.loc[['Ohio'],['three']]

Unnamed: 0,three
Ohio,2


In [181]:
data.drop('one',axis = 1)

Unnamed: 0,two,three,four
Ohio,1,2,3
Colorado,5,6,7
Utah,9,10,11
New York,13,14,15


In [189]:
data.drop('one',axis = 1,inplace=True)

In [190]:
data

Unnamed: 0,two,three,four
Ohio,1,2,3
Colorado,5,6,7
Utah,9,10,11
New York,13,14,15
