In [1]:
!pip install pandas

Collecting pandas
  Downloading pandas-2.1.4-cp312-cp312-win_amd64.whl.metadata (18 kB)
Collecting numpy<2,>=1.26.0 (from pandas)
  Downloading numpy-1.26.2-cp312-cp312-win_amd64.whl.metadata (61 kB)
     ---------------------------------------- 0.0/61.2 kB ? eta -:--:--
     -------------------- ------------------- 30.7/61.2 kB 1.3 MB/s eta 0:00:01
     ---------------------------------------- 61.2/61.2 kB 1.6 MB/s eta 0:00:00
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2023.3.post1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.1 (from pandas)
  Downloading tzdata-2023.3-py2.py3-none-any.whl (341 kB)
     ---------------------------------------- 0.0/341.8 kB ? eta -:--:--
     -------------------------------------- 341.8/341.8 kB 7.1 MB/s eta 0:00:00
Downloading pandas-2.1.4-cp312-cp312-win_amd64.whl (10.5 MB)
   ---------------------------------------- 0.0/10.5 MB ? eta -:--:--
   --- ------------------------------------ 1.0/10.5 MB 20.9 MB/s eta 0:00:01


In [9]:
import pandas as pd
data = [1, 2, 3]
ser = pd.Series(data, index=['a', 'b', 'c'])
ser

a    1
b    2
c    3
dtype: int64

In [8]:
ser.loc['b']

b    1
b    2
dtype: int64

In [10]:
ser.loc['b':'c']

b    2
c    3
dtype: int64

In [11]:
ser.loc[['a','c']]

a    1
c    3
dtype: int64

In [12]:
ser.iloc[1]

2

In [13]:
ser.iloc[1:3]

b    2
c    3
dtype: int64

In [14]:
ser.loc[[True, False, True]]

a    1
c    3
dtype: int64

In [15]:
ser != 2

a     True
b    False
c     True
dtype: bool

In [16]:
ser.loc[ser != 2]

a    1
c    3
dtype: int64

### DataFrame

In [18]:
import pandas as pd
data = [[1,10,100],[2,20,200],[3,30,300]]
df = pd.DataFrame(data, index=['r1', 'r2', 'r3'], columns=['c1', 'c2', 'c3'])
df

Unnamed: 0,c1,c2,c3
r1,1,10,100
r2,2,20,200
r3,3,30,300


In [19]:
df.loc['r2', 'c2']

20

In [20]:
df.loc['r2', :]

c1      2
c2     20
c3    200
Name: r2, dtype: int64

In [22]:
df.loc[:, 'c2']

r1    10
r2    20
r3    30
Name: c2, dtype: int64

In [23]:
df.loc[['r1','r3'], 'c2':'c3']

Unnamed: 0,c2,c3
r1,10,100
r3,30,300


In [25]:
df.iloc[1:3, [0,2]]

Unnamed: 0,c1,c3
r2,2,200
r3,3,300


In [26]:
df['c2']

r1    10
r2    20
r3    30
Name: c2, dtype: int64

In [27]:
df > 10

Unnamed: 0,c1,c2,c3
r1,False,False,True
r2,False,True,True
r3,False,True,True


In [28]:
df.loc[df['c2'] > 10]

Unnamed: 0,c1,c2,c3
r2,2,20,200
r3,3,30,300


In [30]:
# c1열이 1보다 큰 동시에 c3열이 300보다 작은 데이터
df.loc[(df['c1'] > 1) & (df['c3'] < 300)]

Unnamed: 0,c1,c2,c3
r2,2,20,200


### 판다스 파일 불러오기

In [33]:
import pandas as pd
csv = "./sample/anime.csv"
df = pd.read_csv(csv)
df.head(3)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262


In [34]:
df = pd.read_csv(csv, index_col=0)
df.head(3)

Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262


In [35]:
df = pd.read_csv(csv, index_col='anime_id')
df.head(3)

Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262


In [36]:
df.dtypes

name         object
genre        object
type         object
episodes     object
rating      float64
members       int64
dtype: object

In [37]:
df = pd.read_csv(csv, index_col='anime_id', dtype={'members':float})
df.head(3)

Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630.0
5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665.0
28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262.0


In [38]:
df.dtypes

name         object
genre        object
type         object
episodes     object
rating      float64
members     float64
dtype: object

In [41]:
csv = "./sample/anime_stock_price.csv"
df = pd.read_csv(csv, parse_dates=['Date'])
df.head(3)

Unnamed: 0,Date,TOEI ANIMATION,IG Port
0,2015-01-01,3356.86,1201.51
1,2015-01-02,3356.86,1201.51
2,2015-01-05,3396.12,1218.44


In [42]:
df.dtypes

Date              datetime64[ns]
TOEI ANIMATION           float64
IG Port                  float64
dtype: object

In [43]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
     ---------------------------------------- 0.0/250.0 kB ? eta -:--:--
     - -------------------------------------- 10.2/250.0 kB ? eta -:--:--
     ------ ------------------------------ 41.0/250.0 kB 495.5 kB/s eta 0:00:01
     -------------------------------------- 250.0/250.0 kB 1.9 MB/s eta 0:00:00
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2


In [44]:
import pandas as pd
import openpyxl
xlsx = "./sample/anime.xlsx"
df = pd.read_excel(xlsx)
df.head(3)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,15335,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1,9.1,72534
2,28851,Koe no Katachi,"Drama, School, Shounen",Movie,1,9.05,102733


### SQL 

In [45]:
!pip install pandasql

Collecting pandasql
  Downloading pandasql-0.7.3.tar.gz (26 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting sqlalchemy (from pandasql)
  Downloading SQLAlchemy-2.0.23-cp312-cp312-win_amd64.whl.metadata (9.8 kB)
Collecting typing-extensions>=4.2.0 (from sqlalchemy->pandasql)
  Downloading typing_extensions-4.9.0-py3-none-any.whl.metadata (3.0 kB)
Collecting greenlet!=0.4.17 (from sqlalchemy->pandasql)
  Downloading greenlet-3.0.2-cp312-cp312-win_amd64.whl.metadata (3.8 kB)
Downloading SQLAlchemy-2.0.23-cp312-cp312-win_amd64.whl (2.1 MB)
   ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
   - -------------------------------------- 0.1/2.1 MB 1.7 MB/s eta 0:00:02
   ---- --------

In [47]:
from pandasql import sqldf
dfsql = lambda q:sqldf(q, globals())
result = dfsql("select name,rating from df where rating > 9.0 order by rating desc")
result

Unnamed: 0,name,rating
0,Taka no Tsume 8: Yoshida-kun no X-Files,10.0
1,Mogura no Motoro,9.5
2,Kimi no Na wa.,9.37
3,Kahei no Umi,9.33
4,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,9.1
5,Koe no Katachi,9.05


### JSON 저장하기

In [48]:
result.to_json('./sample/anime.json', orient='table')

### HTML 가져오기

In [52]:
url = "https://docs.python.org/3/py-modindex.html"
table = pd.read_html(url)
table

[      0           1                                                  2
 0   NaN         NaN                                                NaN
 1   NaN           _                                                NaN
 2   NaN  __future__                       Future statement definitions
 3   NaN    __main__  The environment where top-level code is run. C...
 4   NaN     _thread                           Low-level threading API.
 ..   ..         ...                                                ...
 345 NaN      zipapp              Manage executable Python zip archives
 346 NaN     zipfile           Read and write ZIP-format archive files.
 347 NaN   zipimport  Support for importing Python modules from ZIP ...
 348 NaN        zlib  Low-level interface to compression and decompr...
 349 NaN    zoneinfo                             IANA time zone support
 
 [350 rows x 3 columns]]

In [57]:
table[0].loc[:, 1:].dropna().head(3)

Unnamed: 0,1,2
2,__future__,Future statement definitions
3,__main__,The environment where top-level code is run. C...
4,_thread,Low-level threading API.


### Where

In [58]:
df.where(df['rating'] < 9.2).head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,,,,,,,
1,15335.0,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1.0,9.1,72534.0
2,28851.0,Koe no Katachi,"Drama, School, Shounen",Movie,1.0,9.05,102733.0
3,199.0,Sen to Chihiro no Kamikakushi,"Adventure, Drama, Supernatural",Movie,1.0,8.93,466254.0
4,12355.0,Ookami Kodomo no Ame to Yuki,"Fantasy, Slice of Life",Movie,1.0,8.84,226193.0


In [60]:
df.sort_values('rating', ascending=False).head(3)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
2143,33662,Taka no Tsume 8: Yoshida-kun no X-Files,"Comedy, Parody",Movie,1,10.0,13
1894,23005,Mogura no Motoro,Slice of Life,Movie,1,9.5,62
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
