In [78]:
import pandas as pd

In [79]:
from pandas import Series, DataFrame

In [80]:
obj = Series([0, -1, 2, -3, 4])
obj

0    0
1   -1
2    2
3   -3
4    4
dtype: int64

In [81]:
obj.values

array([ 0, -1,  2, -3,  4])

In [82]:
obj.index

RangeIndex(start=0, stop=5, step=1)

In [83]:
obj2 = Series([6, 4, 2, -4], index=['a', 'b', 'c', 'd'])
obj2

a    6
b    4
c    2
d   -4
dtype: int64

In [84]:
obj2.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [85]:
obj2['c'] == 2

True

In [86]:
obj2['c'] = 0
print(
        obj2['c'] == 2,
        obj2['c'] == 0
    )

False True


In [87]:
obj2[['d', 'c']]

d   -4
c    0
dtype: int64

In [88]:
obj2[obj2 == 0]

c    0
dtype: int64

In [89]:
obj2[obj2 > 0]

a    6
b    4
dtype: int64

In [90]:
obj2[obj2 < 0]

d   -4
dtype: int64

In [91]:
obj2 ** 2

a    36
b    16
c     0
d    16
dtype: int64

In [92]:
import numpy as np
np.sqrt(obj2 ** 2)

a    6.0
b    4.0
c    0.0
d    4.0
dtype: float64

In [93]:
'a' in obj2

True

In [94]:
'z' in obj2

False

In [95]:
capitals = {
    'Russia': 'Moscow',
    'Japan': 'Tokyo',
    'France': 'Paris'
}

obj3 = pd.Series(capitals)
obj3

Russia    Moscow
Japan      Tokyo
France     Paris
dtype: object

In [96]:
obj4 = pd.Series(capitals, index=['China', 'Russia', 'Japan', 'France'])
obj4

China        NaN
Russia    Moscow
Japan      Tokyo
France     Paris
dtype: object

In [97]:
pd.isnull(obj4)

China      True
Russia    False
Japan     False
France    False
dtype: bool

In [98]:
obj4.notnull()

China     False
Russia     True
Japan      True
France     True
dtype: bool

In [99]:
obj4.isnull()

China      True
Russia    False
Japan     False
France    False
dtype: bool

In [100]:
obj3 + obj4

China              NaN
France      ParisParis
Japan       TokyoTokyo
Russia    MoscowMoscow
dtype: object

In [101]:
obj4.name = 'capital'

In [102]:
obj4.index.name = 'state'

In [103]:
obj4

state
China        NaN
Russia    Moscow
Japan      Tokyo
France     Paris
Name: capital, dtype: object

In [104]:
obj

0    0
1   -1
2    2
3   -3
4    4
dtype: int64

In [105]:
obj.index = ['Null', 'Minus one', 'Two', 'Minus three', 'Four']
obj

Null           0
Minus one     -1
Two            2
Minus three   -3
Four           4
dtype: int64

## DataFrame

In [106]:
data = {'city': ['Miass', 'Chelyabinsk', 'Miass', 'Kopeysk'],
        'year': [1999, 1999, 2000, 2010],
        'pop': [0.4, 1.0, 0.45, 0.35]}

frame = pd.DataFrame(data)
frame

Unnamed: 0,city,year,pop
0,Miass,1999,0.4
1,Chelyabinsk,1999,1.0
2,Miass,2000,0.45
3,Kopeysk,2010,0.35


In [107]:
frame.head(1)

Unnamed: 0,city,year,pop
0,Miass,1999,0.4


In [108]:
frame.tail(1)

Unnamed: 0,city,year,pop
3,Kopeysk,2010,0.35


In [109]:
pd.DataFrame(data, columns=['pop', 'city', 'year'])

Unnamed: 0,pop,city,year
0,0.4,Miass,1999
1,1.0,Chelyabinsk,1999
2,0.45,Miass,2000
3,0.35,Kopeysk,2010


In [110]:
frame2 = pd.DataFrame(data, columns=['year', 'pop', 'city', 'debt'],
            index=['_1', '_2', '_3', '_4'])
frame2

Unnamed: 0,year,pop,city,debt
_1,1999,0.4,Miass,
_2,1999,1.0,Chelyabinsk,
_3,2000,0.45,Miass,
_4,2010,0.35,Kopeysk,


In [111]:
frame2.columns

Index(['year', 'pop', 'city', 'debt'], dtype='object')

In [112]:
frame2['pop']

_1    0.40
_2    1.00
_3    0.45
_4    0.35
Name: pop, dtype: float64

In [113]:
frame2.year

_1    1999
_2    1999
_3    2000
_4    2010
Name: year, dtype: int64

In [114]:
frame2.loc['_3']

year     2000
pop      0.45
city    Miass
debt      NaN
Name: _3, dtype: object

In [115]:
frame2['debt'] = 14.4
frame2

Unnamed: 0,year,pop,city,debt
_1,1999,0.4,Miass,14.4
_2,1999,1.0,Chelyabinsk,14.4
_3,2000,0.45,Miass,14.4
_4,2010,0.35,Kopeysk,14.4


In [116]:
frame2['debt'] = np.arange(4.)
frame2

Unnamed: 0,year,pop,city,debt
_1,1999,0.4,Miass,0.0
_2,1999,1.0,Chelyabinsk,1.0
_3,2000,0.45,Miass,2.0
_4,2010,0.35,Kopeysk,3.0


In [117]:
val = pd.Series([-1, -2, -3], index=['_2', '_1', '_6'])
frame2.debt = val
frame2

Unnamed: 0,year,pop,city,debt
_1,1999,0.4,Miass,-2.0
_2,1999,1.0,Chelyabinsk,-1.0
_3,2000,0.45,Miass,
_4,2010,0.35,Kopeysk,


In [118]:
frame2['eastern'] = frame2.city == 'Miass'
frame2

Unnamed: 0,year,pop,city,debt,eastern
_1,1999,0.4,Miass,-2.0,True
_2,1999,1.0,Chelyabinsk,-1.0,False
_3,2000,0.45,Miass,,True
_4,2010,0.35,Kopeysk,,False


In [119]:
frame2.columns

Index(['year', 'pop', 'city', 'debt', 'eastern'], dtype='object')

In [120]:
del frame2['eastern']
frame2.columns

Index(['year', 'pop', 'city', 'debt'], dtype='object')

In [123]:
pop = {'Nevada': {2002: 2.5, 2003: 3.},
        'Ohio': {2001: 1.6, 2002: 1.8, 2003: 3.7}}
frame3 = pd.DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2002,2.5,1.8
2003,3.0,3.7
2001,,1.6


In [125]:
frame3.T

Unnamed: 0,2002,2003,2001
Nevada,2.5,3.0,
Ohio,1.8,3.7,1.6


In [128]:
pd.DataFrame(pop, index=[2001, 2002, 2003])

Unnamed: 0,Nevada,Ohio
2001,,1.6
2002,2.5,1.8
2003,3.0,3.7


In [136]:
pdata = {
    'Ohio': frame3['Ohio'][:-1], #[:-1],
    'Nevada': frame3['Nevada'][:2], #[:2]
}
pd.DataFrame(pdata)

Unnamed: 0,Ohio,Nevada
2002,1.8,2.5
2003,3.7,3.0


In [137]:
frame3

Unnamed: 0,Nevada,Ohio
2002,2.5,1.8
2003,3.0,3.7
2001,,1.6


In [138]:
frame3.index.name = 'year'
frame3.columns.name = 'state'
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2002,2.5,1.8
2003,3.0,3.7
2001,,1.6


In [139]:
frame3.values

array([[2.5, 1.8],
       [3. , 3.7],
       [nan, 1.6]])

In [140]:
frame2.values

array([[1999, 0.4, 'Miass', -2.0],
       [1999, 1.0, 'Chelyabinsk', -1.0],
       [2000, 0.45, 'Miass', nan],
       [2010, 0.35, 'Kopeysk', nan]], dtype=object)

## Task 1

In [148]:
array2d = np.array((np.ndarray(2), np.ndarray(2)))
frame5 = pd.DataFrame(array2d)
frame5

Unnamed: 0,0,1
0,5e-324,8.027e-320
1,5e-324,8.027e-320


In [149]:
pdata2 = {
    'array': np.arange(4),
    'list': ['one', 'two', 'three', 'four'],
    'set': (1, 2, 3, 4)
}

frame6 = pd.DataFrame(pdata2)
frame6

Unnamed: 0,array,list,set
0,0,one,1
1,1,two,2
2,2,three,3
3,3,four,4


In [158]:
pdata3 = np.zeros(3, dtype={'names': ('name', 'age'),
                            'formats': ('U10', 'i')})
pdata3['name'] = ['Andrey', 'Kseniya', 'Maria']
pdata3['age'] = [22, 26, 30]
frame7 = pd.DataFrame(pdata3)
frame7

Unnamed: 0,name,age
0,Andrey,22
1,Kseniya,26
2,Maria,30


In [160]:
pdata4 = {
    'col1': pd.Series([1, 2, 3]),
    'col2': pd.Series([4, 5, 6])
}
frame8 = pd.DataFrame(pdata4)
frame8

Unnamed: 0,col1,col2
0,1,4
1,2,5
2,3,6


In [161]:
pdata5 = {
        'col1': {'row1': 1, 'row2': 2, 'row3': 3},
        'col2': {'row1': 4, 'row2': 5, 'row3': 6}
        }
frame9 = pd.DataFrame(pdata5)
frame9

Unnamed: 0,col1,col2
row1,1,4
row2,2,5
row3,3,6


In [162]:
pdata6 = [{'col1': 1, 'col2': 2}, {'col1': 3, 'col2': 4}]
frame10 = pd.DataFrame(pdata6)
frame10

Unnamed: 0,col1,col2
0,1,2
1,3,4


In [163]:
pdata7 = [[1, 4], [2, 5], [3, 6]]
frame11 = pd.DataFrame(pdata7)
frame11

Unnamed: 0,0,1
0,1,4
1,2,5
2,3,6


In [165]:
frame12 = pd.DataFrame(frame9)
frame12 == frame9

Unnamed: 0,col1,col2
row1,True,True
row2,True,True
row3,True,True


In [169]:
pdata8 = np.ma.array([[1, 2, 3], [4, 5, 6]], mask=[[False, True, False], [True, False, True]])
frame13 = pd.DataFrame(pdata8)
frame13

Unnamed: 0,0,1,2
0,1.0,,3.0
1,,5.0,


## Index objects

In [170]:
obj = Series(range(4), index=['one', 'two', 'three', 'four'])
obj.index


Index(['one', 'two', 'three', 'four'], dtype='object')

In [171]:
obj.index[2:]


Index(['three', 'four'], dtype='object')

In [173]:
obj.index['one'] = -1000

TypeError: Index does not support mutable operations

In [175]:
labels = pd.Index(np.arange(4))
labels

Int64Index([0, 1, 2, 3], dtype='int64')

In [176]:
obj2 = pd.Series([1.15, -2.4, 9, 1], index=labels)
obj2

0    1.15
1   -2.40
2    9.00
3    1.00
dtype: float64

In [177]:
obj2.index is labels

True

In [178]:
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2002,2.5,1.8
2003,3.0,3.7
2001,,1.6


In [179]:
frame3.columns

Index(['Nevada', 'Ohio'], dtype='object', name='state')

In [180]:
'Ohio' in frame3.columns

True

In [181]:
2003 in frame3.index

True

In [182]:
2008 in frame3.index

False

In [183]:
dup_labels = pd.Index(['one', 'one', 'two', 'two'])
dup_labels

Index(['one', 'one', 'two', 'two'], dtype='object')

## Task 2

In [184]:
index1 = pd.Index(['a', 'b'])
index2 = index1.append(pd.Index(['c', 'd']))
index2

Index(['a', 'b', 'c', 'd'], dtype='object')

In [192]:
index3 = pd.Index(['a', 'c'])
index2.difference(index3)

Index(['b', 'e'], dtype='object')

In [191]:
index1 = pd.Index(['a', 'b', 'c'])
index2 = pd.Index(['a', 'b', 'e'])
index1.intersection(index2)

Index(['a', 'b'], dtype='object')

In [194]:
index1 = pd.Index(['a', 'b', 'c'])
index2 = pd.Index(['d', 'e', 'f', 'c'])
index1.union(index2)

Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object')

In [195]:
values = ['d', 'e']
index2.isin(values)

array([ True,  True, False, False])

In [196]:
index2.delete(3)

Index(['d', 'e', 'f'], dtype='object')

In [197]:
index2.drop('c')

Index(['d', 'e', 'f'], dtype='object')

In [198]:
index2.insert(0, 'c')

Index(['c', 'd', 'e', 'f', 'c'], dtype='object')

In [199]:
index1.is_monotonic_increasing

True

In [200]:
index2.insert(0, 'c').is_monotonic_increasing

False

In [202]:
index2.insert(0, 'c').is_unique

False

In [203]:
index2.is_unique

True

In [204]:
index2.insert(0, 'c').unique()

Index(['c', 'd', 'e', 'f'], dtype='object')