### Pandas
- 데이터 분석을 위한 쉽고 성능이 좋은 오픈소스 python 라이브러리
- Series : index, value로 이루어진 데이터 타입
- DataFrame : index, column, value로 이루어진 데이터 타입
    - CRUD : create, read, update, delete
    - IO

In [1]:
import pandas as pd

In [2]:
# 1. Series -> numpy array

In [3]:
datas = pd.Series([3, 1, 9])
datas

0    3
1    1
2    9
dtype: int64

In [4]:
list(datas.index), datas.values, datas.dtype

([0, 1, 2], array([3, 1, 9]), dtype('int64'))

In [5]:
# 2. DateFrame

In [6]:
# create

In [7]:
# 리스트 안에 딕셔너리 : row 데이터를 기준으로 저장

In [8]:
datas = [
    {"name": "peter", "email": "peter@gmail.com", "id": 1},
    {"name": "jhon", "email": "jhon@gmail.com", "id": 2},
    {"name": "andy", "email": "andy@naver.com", "id": 3},
]
df = pd.DataFrame(datas)
df

Unnamed: 0,name,email,id
0,peter,peter@gmail.com,1
1,jhon,jhon@gmail.com,2
2,andy,andy@naver.com,3


In [None]:
# 딕셔너리 안에 리스트 : column 데이터를 기준으로 저장

In [10]:
# from_dict
datas = df.to_dict("list")
print(datas)
df = pd.DataFrame(datas)
df

{'name': ['peter', 'jhon', 'andy'], 'email': ['peter@gmail.com', 'jhon@gmail.com', 'andy@naver.com'], 'id': [1, 2, 3]}


Unnamed: 0,name,email,id
0,peter,peter@gmail.com,1
1,jhon,jhon@gmail.com,2
2,andy,andy@naver.com,3


In [11]:
list(df.index), list(df.columns), df.values

([0, 1, 2],
 ['name', 'email', 'id'],
 array([['peter', 'peter@gmail.com', 1],
        ['jhon', 'jhon@gmail.com', 2],
        ['andy', 'andy@naver.com', 3]], dtype=object))

In [12]:
df['name'].values

array(['peter', 'jhon', 'andy'], dtype=object)

In [13]:
df.dtypes

name     object
email    object
id        int64
dtype: object

In [None]:
# read

In [14]:
# row : df.loc[row] : [index], [start:end], [start:end:stride]
# df.loc[1]
# df.loc[:1]
df.loc[::-1]

Unnamed: 0,name,email,id
2,andy,andy@naver.com,3
1,jhon,jhon@gmail.com,2
0,peter,peter@gmail.com,1


In [15]:
# column : df[column](Series), df[[columns]](DataFrame)
# df["name"]
df[["id", "name"]]

Unnamed: 0,id,name
0,1,peter
1,2,jhon
2,3,andy


In [16]:
# row + column : df.loc[row, columns]
df.loc[1:, ["id", "name"]]

Unnamed: 0,id,name
1,2,jhon
2,3,andy


In [None]:
# condition : df[df[column] condition]

In [17]:
# 브로드캐스팅
df["id"] % 2 != 0

0     True
1    False
2     True
Name: id, dtype: bool

In [19]:
df

Unnamed: 0,name,email,id
0,peter,peter@gmail.com,1
1,jhon,jhon@gmail.com,2
2,andy,andy@naver.com,3


In [20]:
# df['name'].apply(lambda x: x+'_name')
# df.apply(lambda x: 함수 또는 "x + 1")

# def my_func(x):
#     return x

# df.apply(lambda x: my_func(x))

0    peter_name
1     jhon_name
2     andy_name
Name: name, dtype: object

In [18]:
df[df["id"] % 2 != 0]

Unnamed: 0,name,email,id
0,peter,peter@gmail.com,1
2,andy,andy@naver.com,3


In [None]:
# update

In [21]:
df["id"] = 4
df

Unnamed: 0,name,email,id
0,peter,peter@gmail.com,4
1,jhon,jhon@gmail.com,4
2,andy,andy@naver.com,4


In [22]:
df["id"] = [5, 6, 7]
df

Unnamed: 0,name,email,id
0,peter,peter@gmail.com,5
1,jhon,jhon@gmail.com,6
2,andy,andy@naver.com,7


In [None]:
# delete : df.drop(index=[], columns=[])

In [23]:
df.drop(index=[1], columns=["email"])

Unnamed: 0,name,id
0,peter,5
2,andy,7


In [24]:
df

Unnamed: 0,name,email,id
0,peter,peter@gmail.com,5
1,jhon,jhon@gmail.com,6
2,andy,andy@naver.com,7


In [None]:
# IO : .csv

In [25]:
# save
df.to_csv("user.csv", index=False)

In [None]:
%ls

In [26]:
!cat user.csv

name,email,id
peter,peter@gmail.com,5
jhon,jhon@gmail.com,6
andy,andy@naver.com,7


In [27]:
# load
load_df = pd.read_csv("user.csv")
load_df

Unnamed: 0,name,email,id
0,peter,peter@gmail.com,5
1,jhon,jhon@gmail.com,6
2,andy,andy@naver.com,7


In [28]:
!pip install xlrd openpyxl xlsxwriter



In [29]:
df.to_excel(
    "user.xlsx", index=False, engine="xlsxwriter", encoding="utf-8-sig"
)

In [None]:
%ls

In [None]:
load_df = pd.read_excel("user.xlsx")
load_df