In [1]:
import numpy as np
import pandas as pd

In [3]:
data = pd.Series([0.25,0.5,0.75,1])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [5]:
list(data.index)

[0, 1, 2, 3]

In [6]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [7]:
data[1]

0.5

In [9]:
data = pd.Series([0.25,0.5,0.75,1], index = ['a','b','c','d'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [10]:
data['a']

0.25

In [11]:
data = pd.Series([0.25,0.5,0.75,1], index = [2,3,4,5])
data

2    0.25
3    0.50
4    0.75
5    1.00
dtype: float64

In [12]:
data[2]

0.25

In [13]:
data[1]

KeyError: 1

In [14]:
my_dict = {'CA' : 38332521, "TX" : 26448193, "NY" : 19651127, "FL" : 19552860, "IL" : 12882135}
my_dict

{'CA': 38332521,
 'TX': 26448193,
 'NY': 19651127,
 'FL': 19552860,
 'IL': 12882135}

In [15]:
pop = pd.Series(my_dict)
pop

CA    38332521
TX    26448193
NY    19651127
FL    19552860
IL    12882135
dtype: int64

In [16]:
pop['CA']

38332521

In [17]:
pop['CA' : 'FL']

CA    38332521
TX    26448193
NY    19651127
FL    19552860
dtype: int64

In [18]:
my_dict2 = {'CA' : 423967, "TX" : 695662, "NY" : 141297, "FL" : 170312, "IL" : 149995}
my_dict2

{'CA': 423967, 'TX': 695662, 'NY': 141297, 'FL': 170312, 'IL': 149995}

In [20]:
area = pd.Series(my_dict2)
area

CA    423967
TX    695662
NY    141297
FL    170312
IL    149995
dtype: int64

# pandas dataframe

In [23]:
states = pd.DataFrame({"area" : area, "pop" : pop})
states

Unnamed: 0,area,pop
CA,423967,38332521
TX,695662,26448193
NY,141297,19651127
FL,170312,19552860
IL,149995,12882135


In [24]:
states2 = pd.DataFrame({'area' : my_dict2, 'pop' : my_dict})
states2

Unnamed: 0,area,pop
CA,423967,38332521
FL,170312,19552860
IL,149995,12882135
NY,141297,19651127
TX,695662,26448193


In [25]:
states2.values

array([[  423967, 38332521],
       [  170312, 19552860],
       [  149995, 12882135],
       [  141297, 19651127],
       [  695662, 26448193]], dtype=int64)

In [28]:
states2.columns

Index(['area', 'pop'], dtype='object')

In [29]:
states2.columns[0]

'area'

In [27]:
states2.index

Index(['CA', 'TX', 'NY', 'FL', 'IL'], dtype='object')

In [31]:
data = [{"a" : i+10, "b" : 2*i} for i in range(3)]
data

[{'a': 10, 'b': 0}, {'a': 11, 'b': 2}, {'a': 12, 'b': 4}]

In [32]:
pd.DataFrame(data)

Unnamed: 0,a,b
0,10,0
1,11,2
2,12,4


In [33]:
data2 = {'a' : [0,10,20], 'b' : [0,2,4]}

In [34]:
pd.DataFrame(data2)

Unnamed: 0,a,b
0,0,0
1,10,2
2,20,4


In [36]:
temp1 = np.random.rand(3,2)
temp1

array([[0.07677806, 0.90653236],
       [0.93987592, 0.99703649],
       [0.20183502, 0.58417922]])

In [38]:
temp2 = pd.DataFrame(temp1, index = ['a','b','c'], columns = ['Math', 'Stat'])
temp2

Unnamed: 0,Math,Stat
a,0.076778,0.906532
b,0.939876,0.997036
c,0.201835,0.584179


In [40]:
ind = pd.Index([2,3,5,7,11])
ind

Int64Index([2, 3, 5, 7, 11], dtype='int64')

In [41]:
ind[::2] #2칸씩 뛰면서

Int64Index([2, 5, 11], dtype='int64')

In [42]:
ind.dtype

dtype('int64')

In [43]:
indA = pd.Index([1,2,3,4,5,6])
indB = pd.Index([3,5,7,9,11])

In [44]:
indA & indB

Int64Index([3, 5], dtype='int64')

# indexing and Selection

In [46]:
data = pd.Series(np.arange(10,20,2), index = ['a','b','c','d','e'])
data

a    10
b    12
c    14
d    16
e    18
dtype: int32

In [47]:
data['b':'e']

b    12
c    14
d    16
e    18
dtype: int32

In [48]:
'a' in data

True

In [49]:
data.values

array([10, 12, 14, 16, 18])

# iloc

In [51]:
data

a    10
b    12
c    14
d    16
e    18
dtype: int32

In [50]:
data.iloc[2]

14

In [52]:
data.iloc[2:4]

c    14
d    16
dtype: int32

In [54]:
states

Unnamed: 0,area,pop
CA,423967,38332521
TX,695662,26448193
NY,141297,19651127
FL,170312,19552860
IL,149995,12882135


In [57]:
temp2

Unnamed: 0,Math,Stat
a,0.076778,0.906532
b,0.939876,0.997036
c,0.201835,0.584179


In [58]:
temp2['Tot'] = temp2.Math + temp2.Stat
temp2

Unnamed: 0,Math,Stat,Tot
a,0.076778,0.906532,0.98331
b,0.939876,0.997036,1.936912
c,0.201835,0.584179,0.786014


In [59]:
temp2.T

Unnamed: 0,a,b,c
Math,0.076778,0.939876,0.201835
Stat,0.906532,0.997036,0.584179
Tot,0.98331,1.936912,0.786014


In [60]:
temp2.values

array([[0.07677806, 0.90653236, 0.98331043],
       [0.93987592, 0.99703649, 1.93691241],
       [0.20183502, 0.58417922, 0.78601424]])

In [68]:
temp2.iloc[0:2]

Unnamed: 0,Math,Stat,Tot
a,0.076778,0.906532,0.98331
b,0.939876,0.997036,1.936912


In [72]:
temp2[temp2.Math<0.3]

Unnamed: 0,Math,Stat,Tot
a,0.076778,0.906532,0.98331
c,0.201835,0.584179,0.786014


In [73]:
temp2.loc[temp2.Math<0.3]

Unnamed: 0,Math,Stat,Tot
a,0.076778,0.906532,0.98331
c,0.201835,0.584179,0.786014
