In [3]:
import pandas as pd

df1 = pd.DataFrame({
    "id" : [1,2,3],
    "customer_id" : [1,2,3],
    "customer_name" : ["Robert","Peter","David"]
}, columns= ["id", "customer_id", "customer_name"])

df1

Unnamed: 0,id,customer_id,customer_name
0,1,1,Robert
1,2,2,Peter
2,3,3,David


In [4]:
df2 = pd.DataFrame({
    "id" : [1,2,4],
    "order_id" : [100,200,300],
    "order_date" : ["2025-01-21","2025-02-03","2025-09-01"]
}, columns= ["id", "order_id", "order_date"])

df2

Unnamed: 0,id,order_id,order_date
0,1,100,2025-01-21
1,2,200,2025-02-03
2,4,300,2025-09-01


#### 서로 다른 데이터 프레임을 하나로 합치는 방법
1. concat() : 서로 다른 데이터 프레임을 단순 연결 (*교차되는 특정 인덱스 고려X) => 좌, 우 & 상, 하(디폴트) 가능
2. merge() : 특정 인덱스를 기준

In [5]:
pd.concat([df1, df2])

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1.0,Robert,,
1,2,2.0,Peter,,
2,3,3.0,David,,
0,1,,,100.0,2025-01-21
1,2,,,200.0,2025-02-03
2,4,,,300.0,2025-09-01


In [7]:
doc = pd.concat([df1, df2], axis=1)  # 축 변경
doc

Unnamed: 0,id,customer_id,customer_name,id.1,order_id,order_date
0,1,1,Robert,1,100,2025-01-21
1,2,2,Peter,2,200,2025-02-03
2,3,3,David,4,300,2025-09-01


In [9]:
pd.merge(df1, df2)  # merge() = inner join 기능 유사

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1,Robert,100,2025-01-21
1,2,2,Peter,200,2025-02-03


In [10]:
pd.merge(df1, df2, on="id")

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1,Robert,100,2025-01-21
1,2,2,Peter,200,2025-02-03


In [11]:
pd.merge(df1, df2, on="id", how="inner")

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1,Robert,100,2025-01-21
1,2,2,Peter,200,2025-02-03


In [15]:
pd.merge(df1, df2, on="id", how="outer")  #합집합

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1.0,Robert,100.0,2025-01-21
1,2,2.0,Peter,200.0,2025-02-03
2,3,3.0,David,,
3,4,,,300.0,2025-09-01


In [13]:
pd.merge(df1, df2, on="id", how="left")

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1,Robert,100.0,2025-01-21
1,2,2,Peter,200.0,2025-02-03
2,3,3,David,,


In [14]:
pd.merge(df1, df2, on="id", how="right")

Unnamed: 0,id,customer_id,customer_name,order_id,order_date
0,1,1.0,Robert,100,2025-01-21
1,2,2.0,Peter,200,2025-02-03
2,4,,,300,2025-09-01


In [17]:
df1 = df1.set_index("id")
df1

Unnamed: 0_level_0,customer_id,customer_name
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,Robert
2,2,Peter
3,3,David


In [18]:
df2 = df2.set_index("id")
df2

Unnamed: 0_level_0,order_id,order_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,100,2025-01-21
2,200,2025-02-03
4,300,2025-09-01


In [19]:
pd.merge(df1, df2, left_index=True, right_index=True)

Unnamed: 0_level_0,customer_id,customer_name,order_id,order_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,Robert,100,2025-01-21
2,2,Peter,200,2025-02-03


In [20]:
pd.merge(df1, df2, how="outer", left_index=True, right_index=True)

Unnamed: 0_level_0,customer_id,customer_name,order_id,order_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.0,Robert,100.0,2025-01-21
2,2.0,Peter,200.0,2025-02-03
3,3.0,David,,
4,,,300.0,2025-09-01


In [21]:
pd.merge(df1, df2, how="left", left_index=True, right_index=True)

Unnamed: 0_level_0,customer_id,customer_name,order_id,order_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,Robert,100.0,2025-01-21
2,2,Peter,200.0,2025-02-03
3,3,David,,


In [22]:
pd.merge(df1, df2, how="right", left_index=True, right_index=True)

Unnamed: 0_level_0,customer_id,customer_name,order_id,order_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.0,Robert,100,2025-01-21
2,2.0,Peter,200,2025-02-03
4,,,300,2025-09-01


In [25]:
df = pd.DataFrame({
    "영어" : [60,70],
    "수학" : [100,50]
}, columns=["영어" ,"수학"], index=["Dave", "David"])

df

Unnamed: 0,영어,수학
Dave,60,100
David,70,50


In [34]:
# def func(df_data) : 
#     print(type(df_data))
#     print(df_data.index)
#     print(df_data.values)
#     return df_data
def func(df_data) : 
    df_data["영어"] = 80
    return df_data

In [35]:
df_func = df.apply(func, axis=0)

In [32]:
df_func = df.apply(func, axis=1)

In [36]:
df_func

Unnamed: 0,영어,수학
Dave,80,100
David,80,50
영어,80,80


In [38]:
df = pd.DataFrame({
    "성별" : ["남", "남", "남"],
    "이름" : ["David", "Dave", "Dave"],
    "수학" : [100, 50, 80],
    "국어" : [80, 70, 50],
})
df

Unnamed: 0,성별,이름,수학,국어
0,남,David,100,80
1,남,Dave,50,70
2,남,Dave,80,50


In [44]:
selected_columns = ["이름", "수학", "국어"]

df = df[selected_columns]
# df.groupby("이름").sum()
df.groupby("이름").mean()

Unnamed: 0_level_0,수학,국어
이름,Unnamed: 1_level_1,Unnamed: 2_level_1
Dave,65.0,60.0
David,100.0,80.0


In [45]:
import os 

PATH = "COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/"
file_list, csv_list = os.listdir(PATH), List() # 해당경로안에있는 파일을 리스트로 찾아옴



FileNotFoundError: [WinError 3] 지정된 경로를 찾을 수 없습니다: 'COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/'