In [1]:
import pandas as pd
import numpy as np

In [3]:
# series: 1 dim (size immutable)
# dataframe: 2 dim
# panel: 3 dim

#### Series ####

In [13]:
"""
pd.Series(data, index, dtype, copy)
    data: data takes various forms like ndarray, list, constants
    index: Index values must be unique and hashable, same length as data. 
           Default np.arrange(n) if no index is passed.
    dtype: dtype is for data type. If None, data type will be inferred
    copy: Copy data. Default False
"""
s = pd.Series(dtype=object)
s

Series([], dtype: object)

In [23]:
# from numpy array
data = np.array(['a', 'b', 'c', 'd'])
s = pd.Series(data)
print(s)
s = pd.Series(data, index=[100, 101, 102, 103])
print(s)

0    a
1    b
2    c
3    d
dtype: object
100    a
101    b
102    c
103    d
dtype: object


In [42]:
data = np.array([1.1, 2.2, 3.3, 4.4]) # ??? only affects np.array non string
s = pd.Series(data, copy=False)
s.iloc[0] = 10.2
print(s)
print(data)

0    10.2
1     2.2
2     3.3
3     4.4
dtype: float64
[10.2  2.2  3.3  4.4]


In [50]:
# from dictionary
data = {'a' : 0., 'b' : 1., 'c' : 2.}
s = pd.Series(data) # keys are used to construct index
print(s)
s = pd.Series(data, index=['a', 'b', 'c', 'd']) # index order preserved
print(s)

a    0.0
b    1.0
c    2.0
dtype: float64
a    0.0
b    1.0
c    2.0
d    NaN
dtype: float64


In [52]:
# from scalar
s = pd.Series(5, index=[0, 1, 2, 3])
s # the value will be repeated to match the length of index

0    5
1    5
2    5
3    5
dtype: int64

In [58]:
# accessing data
s = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
print(s[1])
print(s[:3])
print(s[-3:])
print(s['d'])
print(s[['a', 'c', 'd']])


2
a    1
b    2
c    3
dtype: int64
c    3
d    4
e    5
dtype: int64
4
a    1
c    3
d    4
dtype: int64


### DataFrame ###

In [2]:
''' 
pandas.DataFrame(data, index, columns, dtype, copy)
    data: ndarray, series, map, lists, dict, constants
    index: row labels
'''

' \npandas.DataFrame(data, index, columns, dtype, copy)\n    data: ndarray, series, map, lists, dict, constants\n    index: row labels\n'

In [6]:
df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


In [12]:
# create df from list
data = [1,2,3,4,5]
df = pd.DataFrame(data)
print(df)
data = [['Alex',10],['Bob',12],['Clarke',13]]
df = pd.DataFrame(data,columns=['Name','Age'])
print(df)
df = pd.DataFrame(data,columns=['Name','Age'],dtype=float)
print(df) # deprecated

   0
0  1
1  2
2  3
3  4
4  5
     Name  Age
0    Alex   10
1     Bob   12
2  Clarke   13
     Name   Age
0    Alex  10.0
1     Bob  12.0
2  Clarke  13.0


  exec(code_obj, self.user_global_ns, self.user_ns)


In [13]:
# create df from Dict of ndarrays / Lists
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age
0,Tom,28
1,Jack,34
2,Steve,29
3,Ricky,42


In [20]:
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]} # each dic is a column
df = pd.DataFrame(data, index=['rank1','rank2','rank3','rank4'])
df

Unnamed: 0,Name,Age
rank1,Tom,28
rank2,Jack,34
rank3,Steve,29
rank4,Ricky,42


In [21]:
# create df from list of dicts
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}] # each dic is a row
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [22]:
# seet index
data = [{'a': 1, 'b': 2},{'a': 5, 'b': 10, 'c': 20}]
df = pd.DataFrame(data, index=['first', 'second'])
df

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [24]:
# create df from Dic of Series
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']),
   'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [25]:
# select column
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [30]:
# column addition
df['three'] = df['one'] + df['two']
df

Unnamed: 0,one,two,three
a,1.0,1,2.0
b,2.0,2,4.0
c,3.0,3,6.0
d,,4,


In [31]:
# column deletion
del df['three']
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [33]:
df.pop('two')
df

Unnamed: 0,one
a,1.0
b,2.0
c,3.0
d,


In [35]:
# row selection, addition, and deletion
d = {'one' : pd.Series([1, 2, 3], index=['a', 'b', 'c']), 
   'two' : pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1
b,2.0,2
c,3.0,3
d,,4


In [41]:
print(type(df.loc['b']))
df.loc['b']

<class 'pandas.core.series.Series'>


one    2.0
two    2.0
Name: b, dtype: float64

In [42]:
df.iloc[2]

one    3.0
two    3.0
Name: c, dtype: float64

In [43]:
# slice rows
df[2:4]

Unnamed: 0,one,two
c,3.0,3
d,,4


In [None]:
# addition
