# 2. Series

In [2]:
import pandas as pd
import numpy as np  # when you use pandas, it is likely to use numpy together, so it is encouraged to import with numpy.

## 2-1. To Generate Series
* It is similar to NumPy's one-dimensional array - that one column in Excel.
* **Series** = `data of one-dimensional array + row index` (In this case, data type is ndarray)

### 1) assign only value

In [3]:
# if you don't particularly assign index, the index which starts with '0' is gvien.
s = pd.Series([1, 2, 3, 4])
s

0    1
1    2
2    3
3    4
dtype: int64

In [4]:
type(s)

pandas.core.series.Series

In [5]:
s.shape # attribute

(4,)

In [6]:
s.index # attribute

RangeIndex(start=0, stop=4, step=1)

In [7]:
s.values

array([1, 2, 3, 4], dtype=int64)

In [8]:
type(s.values) # we can notice that they use numpy too internal.

numpy.ndarray

In [9]:
# assign dtype
s2 = pd.Series([1, 2, 3, 4], dtype=float)
s2

0    1.0
1    2.0
2    3.0
3    4.0
dtype: float64

In [10]:
s = pd.Series(np.array([1, 2, 3, 4]), dtype=float)
s
# you can make Series with 'np.array' but don't do that inconvenient condunt.

0    1.0
1    2.0
2    3.0
3    4.0
dtype: float64

In [11]:
# make Series with range in 1~10
pd.Series(np.arange(1, 11))

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int32

### 2) give value and index together

In [12]:
index = [1, 2, 3, 4]
s = pd.Series([10, 20, 30, 40], index=index) #index=index is python's grammer. It is giving out of parameter(I will insert data in data)
s

1    10
2    20
3    30
4    40
dtype: int64

In [13]:
index = ['치킨', '탕수육', '곱창', '선지국']
s = pd.Series([16000, 18000, 25000, 10000], index=index)
s

치킨     16000
탕수육    18000
곱창     25000
선지국    10000
dtype: int64

It is also called `index label` if index's vlaue is strings like above.  
Thus, you can give not only number but also strings giving index.

In [14]:
s.index

Index(['치킨', '탕수육', '곱창', '선지국'], dtype='object')

In [15]:
s.values

array([16000, 18000, 25000, 10000], dtype=int64)

### 3) Make with Dictionary Form

In [16]:
s = pd.Series({'치킨' : 16000, '탕수육' : 18000, '곱창' : 20000, '선지국' : 10000})
s

치킨     16000
탕수육    18000
곱창     20000
선지국    10000
dtype: int64

In [17]:
s.name = 'Menu'
s

치킨     16000
탕수육    18000
곱창     20000
선지국    10000
Name: Menu, dtype: int64

In [18]:
s.index.name = '메뉴명'
s

메뉴명
치킨     16000
탕수육    18000
곱창     20000
선지국    10000
Name: Menu, dtype: int64

## 2-2. Series Indexing and Slicing

In [19]:
s

메뉴명
치킨     16000
탕수육    18000
곱창     20000
선지국    10000
Name: Menu, dtype: int64

In [20]:
# Indexing with indexing label
s['치킨'], s.치킨 # you can use this way only when key value is strings.

(16000, 16000)

In [21]:
# Indexing with index which begins with 0
s[0]

16000

In [22]:
s['선지국'], s.선지국, s[3], s[-1] # you have to habituate to use index:-1

(10000, 10000, 10000, 10000)

Meanwhile, if indexing label was given number starts with 1, what will prioritize compare with 0 indexing or indexing label?

In [23]:
t = pd.Series([10, 20, 30, 40], index=[1, 2 ,3, 4])
t

1    10
2    20
3    30
4    40
dtype: int64

In [24]:
t[1]

10

In [25]:
s

메뉴명
치킨     16000
탕수육    18000
곱창     20000
선지국    10000
Name: Menu, dtype: int64

In [26]:
# Fancy Indexing
s[['치킨', '곱창']]

메뉴명
치킨    16000
곱창    20000
Name: Menu, dtype: int64

In [27]:
s[[0, 2]]

메뉴명
치킨    16000
곱창    20000
Name: Menu, dtype: int64

In [28]:
# Boolean Indexing
s <= 17000

메뉴명
치킨      True
탕수육    False
곱창     False
선지국     True
Name: Menu, dtype: bool

In [29]:
s[s <= 17000]

메뉴명
치킨     16000
선지국    10000
Name: Menu, dtype: int64

In [30]:
s[(s >= 18000) & (s <= 20000)]

메뉴명
탕수육    18000
곱창     20000
Name: Menu, dtype: int64

In [31]:
# Slicing(start:end:step)
# 0 인덱싱으로 슬라이싱 : end 인덱스 -1
s

메뉴명
치킨     16000
탕수육    18000
곱창     20000
선지국    10000
Name: Menu, dtype: int64

In [32]:
s[1:3]

메뉴명
탕수육    18000
곱창     20000
Name: Menu, dtype: int64

In [33]:
s['탕수육':'곱창']

메뉴명
탕수육    18000
곱창     20000
Name: Menu, dtype: int64

## 2-3. Series data CRUD

In [34]:
# Inquiry begin with first
s.head(2) # if you don't assign particular argument, default(5) is assigned automaically.

메뉴명
치킨     16000
탕수육    18000
Name: Menu, dtype: int64

In [35]:
# Inquiry begin with last
s.tail(2)

메뉴명
곱창     20000
선지국    10000
Name: Menu, dtype: int64

In [36]:
s

메뉴명
치킨     16000
탕수육    18000
곱창     20000
선지국    10000
Name: Menu, dtype: int64

In [37]:
# create data (계란찜)
s['계란찜'] = 3000
s

메뉴명
치킨     16000
탕수육    18000
곱창     20000
선지국    10000
계란찜     3000
Name: Menu, dtype: int64

In [38]:
# when you didn't decide to value
s['감자탕'] = np.NaN # NaN:Not a Number
s

메뉴명
치킨     16000.0
탕수육    18000.0
곱창     20000.0
선지국    10000.0
계란찜     3000.0
감자탕        NaN
Name: Menu, dtype: float64

In [39]:
# how to find empty data
s.isnull()

메뉴명
치킨     False
탕수육    False
곱창     False
선지국    False
계란찜    False
감자탕     True
Name: Menu, dtype: bool

In [40]:
s.isna() # same with isnull()

메뉴명
치킨     False
탕수육    False
곱창     False
선지국    False
계란찜    False
감자탕     True
Name: Menu, dtype: bool

In [41]:
# Boolean Indexing
s[s.isna()]

메뉴명
감자탕   NaN
Name: Menu, dtype: float64

In [42]:
# how to find not empty data
s.notnull()

메뉴명
치킨      True
탕수육     True
곱창      True
선지국     True
계란찜     True
감자탕    False
Name: Menu, dtype: bool

In [43]:
s.notna() # same with not null()

메뉴명
치킨      True
탕수육     True
곱창      True
선지국     True
계란찜     True
감자탕    False
Name: Menu, dtype: bool

In [44]:
s[s.notnull()]

메뉴명
치킨     16000.0
탕수육    18000.0
곱창     20000.0
선지국    10000.0
계란찜     3000.0
Name: Menu, dtype: float64

In [45]:
# how to update the value - changing value of chicken to 18000
s[0] = 18000
s

메뉴명
치킨     18000.0
탕수육    18000.0
곱창     20000.0
선지국    10000.0
계란찜     3000.0
감자탕        NaN
Name: Menu, dtype: float64

In [46]:
# how to delete the data - deleting Gamja-tang.
del s['감자탕']
s

메뉴명
치킨     18000.0
탕수육    18000.0
곱창     20000.0
선지국    10000.0
계란찜     3000.0
Name: Menu, dtype: float64

## 2-4. Series Calculation

### 1) calculation between Series.

In [49]:
data = [18000, 18000, 20000, 10000, 3000]
index = ['치킨2', '탕수육', '곱창', '선지국', '계란찜']
s2 = pd.Series(data=data, index=index)
s2

치킨2    18000
탕수육    18000
곱창     20000
선지국    10000
계란찜     3000
dtype: int64

In [50]:
s + s2  # you must match with two Series' index label if you want to calculate them or calculating will fail.

계란찜     6000.0
곱창     40000.0
선지국    20000.0
치킨         NaN
치킨2        NaN
탕수육    36000.0
dtype: float64

### 2) calculation between Scalar (Broadcast)

In [52]:
s = s + 2000
s

메뉴명
치킨     22000.0
탕수육    22000.0
곱창     24000.0
선지국    14000.0
계란찜     7000.0
Name: Menu, dtype: float64

### 3) Aggregation Funtion

In [53]:
s.max()

24000.0

In [54]:
s.min()

7000.0

In [55]:
s.sum()

89000.0

In [56]:
s.mean()

17800.0

In [57]:
s.median()

22000.0