## 1. Series class

In [92]:
import pandas as pd
import numpy as np

In [6]:
obj1 = pd.Series([4, 7, -5, 3])
obj1

0    4
1    7
2   -5
3    3
dtype: int64

#### We can set new index

In [7]:
obj2 = pd.Series([4, 7, -5, 3], index=['d','b','a','c'])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

#### Indexing & slicing in Series

In [12]:
obj2['d'], obj2[0]

(4, 4)

### 

In [14]:
obj2[ obj2 > 0 ]

d    4
b    7
c    3
dtype: int64

#### Dictionary can be transformed into Series

In [15]:
sdata = {'Ohio':35000, 'Texas':71000, 'Oregon':76000, 'Utah':5000}
sdata

{'Ohio': 35000, 'Texas': 71000, 'Oregon': 76000, 'Utah': 5000}

In [16]:
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    76000
Utah       5000
dtype: int64

In [17]:
'Ohio' in obj3

True

#### Series automatically handles coallision

In [19]:
states = ['California','Ohio', 'Texas', 'Oregon', 'Utah']
obj4 = pd.Series(sdata, index=states)
obj4

California        NaN
Ohio          35000.0
Texas         71000.0
Oregon        76000.0
Utah           5000.0
dtype: float64

## 2. DataFrame class

In [26]:
data = {
    'state':['Ohio', 'Ohio', 'Ohio', 'Nebada', 'Nebada'],
    'year':[2000, 2001, 2002, 2001, 2022],
    'pop':[1.5, 1.7, 3.6, 2.4, 2.9]
}
type(data)

dict

In [70]:
frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nebada,2001,2.4
4,Nebada,2022,2.9


#### indexing & slicing in DataFrame

In [71]:
# frame[0] - Not run
frame['state']

0      Ohio
1      Ohio
2      Ohio
3    Nebada
4    Nebada
Name: state, dtype: object

In [72]:
frame.year

0    2000
1    2001
2    2002
3    2001
4    2022
Name: year, dtype: int64

#### indexing & slicing using .ix method - deprecated, not recommended

In [73]:
frame.ix[1:2, ['year', 'pop']]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,year,pop
1,2001,1.7
2,2002,3.6


In [74]:
frame.ix[1:2, 'state':'pop']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,state,year,pop
1,Ohio,2001,1.7
2,Ohio,2002,3.6


In [75]:
frame.ix[1:2, 1:3]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,year,pop
1,2001,1.7
2,2002,3.6


In [76]:
frame.index = ['one','two','three','four','five']
frame

Unnamed: 0,state,year,pop
one,Ohio,2000,1.5
two,Ohio,2001,1.7
three,Ohio,2002,3.6
four,Nebada,2001,2.4
five,Nebada,2022,2.9


In [77]:
frame.ix[1:2, 'state':'pop']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,state,year,pop
two,Ohio,2001,1.7


#### indexing & slicing in DataFrame (2)

In [131]:
frame.iloc[[0,2,4]]

Unnamed: 0,state,year,pop
one,Ohio,2000,1.5
three,Ohio,2002,3.6
five,Nebada,2022,2.9


In [141]:
frame.loc[['one','three','five']]

Unnamed: 0,state,year,pop
one,Ohio,2000,1.5
three,Ohio,2002,3.6
five,Nebada,2022,2.9


In [164]:
frame[ frame['pop'] > 2 ]

Unnamed: 0,state,year,pop
three,Ohio,2002,3.6
four,Nebada,2001,2.4
five,Nebada,2022,2.9


#### basic method in DataFrame - describe(), drop()

In [36]:
frame.describe(include='all')

Unnamed: 0,state,year,pop
count,5,5.0,5.0
unique,2,,
top,Ohio,,
freq,3,,
mean,,2005.2,2.42
std,,9.418068,0.864292
min,,2000.0,1.5
25%,,2001.0,1.7
50%,,2001.0,2.4
75%,,2002.0,2.9


In [126]:
frame2 = frame.copy()
frame2.drop(['one', 'two'], axis=0)

Unnamed: 0,state,year,pop
three,Ohio,2002,3.6
four,Nebada,2001,2.4
five,Nebada,2022,2.9


In [125]:
frame2.drop('state', axis=1)

Unnamed: 0,year,pop
one,2000,1.5
two,2001,1.7
three,2002,3.6
four,2001,2.4
five,2022,2.9


#### DataFrame can be transformed into numpy.ndarray

In [165]:
print(type(frame.values), '\n')
print(frame.values[0], '\n')
frame.values

<class 'numpy.ndarray'> 

['Ohio' 2000 1.5] 



array([['Ohio', 2000, 1.5],
       ['Ohio', 2001, 1.7],
       ['Ohio', 2002, 3.6],
       ['Nebada', 2001, 2.4],
       ['Nebada', 2022, 2.9]], dtype=object)

#### basic method in DataFrame - apply()

In [204]:
np.random.seed(0)
frame3 = pd.DataFrame(np.random.randn(4,3), 
                      columns=list('bde'),
                      index=['Utah','Ohio','Texas','Oregon'])
frame3

Unnamed: 0,b,d,e
Utah,1.764052,0.400157,0.978738
Ohio,2.240893,1.867558,-0.977278
Texas,0.950088,-0.151357,-0.103219
Oregon,0.410599,0.144044,1.454274


In [205]:
f = lambda x : x.max()-x.min()
frame3.apply(f)

b    1.830295
d    2.018915
e    2.431551
dtype: float64

In [206]:
frame3.apply(f, axis=1)

Utah      1.363895
Ohio      3.218171
Texas     1.101446
Oregon    1.310230
dtype: float64