In [76]:
import numpy as np
import pandas as pd

In [77]:
a1 = ['송중기', 92, 99, 69, 75]
a2 = ['김나현', 88, 91, 87, 83]
a3 = ['권보아', 72, 86, 71, 84]
a4 = ['박효신', 81, 78, 77, 91]
a5 = ['김범수', 88, 80, 94, 91]
a6 = ["noon", "mom", "dad", "level", "river"]
a7 = ["서울", "서울", "대구", "부산", "부산"]
a8 = ["대구", "서울", "부산", "서울", "부산"]
col1 = ["이름", "국어", "영어", "수학", "과학"]
col2 = ["서울", "대구", "부산"]

df1 = pd.DataFrame([a1, a2, a3, a4, a5], columns=col1)
df2 = df1.copy()
df3 = df1.copy()
df3["과학"] = df3["과학"].astype(str)
df4 = pd.DataFrame(df1["이름"])
df4["단어"] = a6
df5 = pd.DataFrame([[1,5,10], [5,1,5], [10,5,1]], index=col2, columns=col2)*1000
df6 = pd.DataFrame([a7,a8], index=["보내는곳", "받는곳"]).T
df6 = pd.concat([df1["이름"], df6], axis=1)

In [78]:
df3['과학'].astype(int)
# 현변환 함수.astype()

0    75
1    83
2    84
3    91
4    91
Name: 과학, dtype: int64

In [79]:
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
df.dtypes

col1    int64
col2    int64
dtype: object

In [80]:
df.astype('int32').dtypes

col1    int32
col2    int32
dtype: object

In [81]:
df.astype({'col1': 'int32', 'col2':'int64'}).dtypes

col1    int32
col2    int64
dtype: object

In [82]:
df.astype(dtype='Int8').dtypes

col1    Int8
col2    Int8
dtype: object

In [83]:
df.astype('category').dtypes
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [84]:
ser = pd.Series([1, 2], dtype='Int32')
ser

0    1
1    2
dtype: Int32

In [85]:
ser.astype('Int64')

0    1
1    2
dtype: Int64

In [86]:
ser.astype("category")
ser

0    1
1    2
dtype: Int32

In [87]:
from pandas.api.types import CategoricalDtype
cat_type = CategoricalDtype(
    categories=[2, 1], ordered=True
)

ser.astype(cat_type).sort_values()
ser

0    1
1    2
dtype: Int32

In [88]:
ser_date = pd.Series(pd.date_range('20200101', periods=3, tz='CET'))
ser_date

0   2020-01-01 00:00:00+01:00
1   2020-01-02 00:00:00+01:00
2   2020-01-03 00:00:00+01:00
dtype: datetime64[ns, CET]

In [89]:
# 현지 시각을 알고 싶다면 tz설정
now = pd.Timestamp.now(tz='UTC')
now

Timestamp('2023-07-21 06:50:36.904060+0000', tz='UTC')

In [90]:
# apply(): Apply a function along an axis of the DataFrame.   🔰🌸💥   

df4['단어'].apply(func=len)
# elementwise
# return -> Series

0    4
1    3
2    3
3    5
4    5
Name: 단어, dtype: int64

In [91]:
# df2 반석차표 만들기

df2 = df2.set_index('이름')
ser1 = pd.Series(df2.apply('mean', axis=1), name='평균')   # 🔰
df2 = pd.concat([df2, ser1], axis=1)
df2

Unnamed: 0_level_0,국어,영어,수학,과학,평균
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
송중기,92,99,69,75,83.75
김나현,88,91,87,83,87.25
권보아,72,86,71,84,78.25
박효신,81,78,77,91,81.75
김범수,88,80,94,91,88.25


In [92]:
df2['평점'] = df2.apply(lambda x: 'A' if x.평균 > 90 else 'B'\
                       if x.평균 > 80 else 'C', axis=1)
ttl = len(df2['국어'])
df2['석차'] = df2.평균.rank(ascending=False)
df2

Unnamed: 0_level_0,국어,영어,수학,과학,평균,평점,석차
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
송중기,92,99,69,75,83.75,B,3.0
김나현,88,91,87,83,87.25,B,2.0
권보아,72,86,71,84,78.25,C,5.0
박효신,81,78,77,91,81.75,B,4.0
김범수,88,80,94,91,88.25,B,1.0


In [93]:
# df2['반석차'] = df2.apply(lambda x: f"{int(x.loc['석차'])}/{ttl}", axis=1)
df2['반석차'] = df2.apply(lambda x: f"{int(x['석차'])}/{ttl}", axis=1)

list_remaining = list(df2.columns)
list_remaining.pop(-2)
df2 = df2[list_remaining]
df2
# print(type(df2.columns)) -> <class 'pandas.core.indexes.base.Index'> list로 변환이 필요   🔰
# DataFrame.drop()을 쓰면 그 column을 참조한 다른 column에 영향을 준다.

Unnamed: 0_level_0,국어,영어,수학,과학,평균,평점,반석차
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
송중기,92,99,69,75,83.75,B,3/5
김나현,88,91,87,83,87.25,B,2/5
권보아,72,86,71,84,78.25,C,5/5
박효신,81,78,77,91,81.75,B,4/5
김범수,88,80,94,91,88.25,B,1/5


In [100]:
ser2 = df2[['국어', '영어', '수학', '과학', '평균']].apply(lambda x : sum(x) / len(x), axis = 0)
ser = pd.DataFrame(ser2)
idx = pd.MultiIndex.from_tuples([('개인별 성적', '송중기'),
            ('개인별 성적', '김나현'),
            ('개인별 성적', '권보아'),
            ('개인별 성적', '박효신'),
            ('개인별 성적', '김범수'),
            ('과목별 평균',     '')],
           )
df22 = pd.concat([df2, ser.T], keys = ['개인별 성적', '과목별 평균'])
df22.index = idx
df22.fillna('')

Unnamed: 0,Unnamed: 1,국어,영어,수학,과학,평균,평점,반석차
개인별 성적,송중기,92.0,99.0,69.0,75.0,83.75,B,3/5
개인별 성적,김나현,88.0,91.0,87.0,83.0,87.25,B,2/5
개인별 성적,권보아,72.0,86.0,71.0,84.0,78.25,C,5/5
개인별 성적,박효신,81.0,78.0,77.0,91.0,81.75,B,4/5
개인별 성적,김범수,88.0,80.0,94.0,91.0,88.25,B,1/5
과목별 평균,,84.2,86.8,79.6,84.8,83.85,,


In [None]:
df5

In [None]:
df6

In [None]:
# df6['운임'] = df6.apply(lambda x : df5.loc[x.보내는곳, x.받는곳], axis=1)
df6['운임'] = df6.apply(lambda x : df5.loc[x['보내는곳'], x['받는곳']], axis=1)
df6