In [19]:
import pandas as pd
import datetime

# 2023년 기온데이터와 미세먼지 데이터 불러오기
weather_2023 = pd.read_csv('2023_기온데이터.csv', encoding='cp949')
dust_2023 = pd.read_csv('2023_미세먼지.csv', encoding='cp949')

In [20]:
# 필요 없는 변수 제거
weather_2023.drop(['지점', '지점명'], axis=1, inplace=True)

In [21]:
weather_2023

Unnamed: 0,일시,기온(°C),강수량(mm),풍속(m/s),습도(%),적설(cm)
0,2023-01-01 00:00,0.9,,1.4,72,
1,2023-01-01 01:00,1.5,,1.9,71,
2,2023-01-01 02:00,1.5,,1.9,72,
3,2023-01-01 03:00,1.6,,1.6,74,
4,2023-01-01 04:00,1.5,,1.4,74,
...,...,...,...,...,...,...
8755,2023-12-31 19:00,2.6,,0.8,90,2.4
8756,2023-12-31 20:00,1.9,,2.2,94,2.4
8757,2023-12-31 21:00,1.7,,2.4,96,2.4
8758,2023-12-31 22:00,1.3,,0.6,95,2.4


In [22]:
# 필요 없는 변수 제거
dust_2023.drop(['지점', '지점명'], axis=1, inplace=True)
dust_2023

Unnamed: 0,일시,1시간평균 미세먼지농도(㎍/㎥)
0,2023-01-01 00:00,63
1,2023-01-01 01:00,67
2,2023-01-01 02:00,84
3,2023-01-01 03:00,82
4,2023-01-01 04:00,87
...,...,...
8367,2023-12-31 19:00,19
8368,2023-12-31 20:00,26
8369,2023-12-31 21:00,30
8370,2023-12-31 22:00,31


In [23]:
# 미세먼지 데이터에 결측치 존재 (일부 시간 미세먼지 데이터 없음)
# 미세먼지는 min 2 ~ max 400
len(dust_2023), len(weather_2023)

(8372, 8760)

In [24]:
# 기온+미세먼지 데이터 합치기 : 기온데이터를 기준으로
weather_dust_2023 = pd.merge(weather_2023, dust_2023, on='일시', how = 'left')
weather_dust_2023

Unnamed: 0,일시,기온(°C),강수량(mm),풍속(m/s),습도(%),적설(cm),1시간평균 미세먼지농도(㎍/㎥)
0,2023-01-01 00:00,0.9,,1.4,72,,63.0
1,2023-01-01 01:00,1.5,,1.9,71,,67.0
2,2023-01-01 02:00,1.5,,1.9,72,,84.0
3,2023-01-01 03:00,1.6,,1.6,74,,82.0
4,2023-01-01 04:00,1.5,,1.4,74,,87.0
...,...,...,...,...,...,...,...
8755,2023-12-31 19:00,2.6,,0.8,90,2.4,19.0
8756,2023-12-31 20:00,1.9,,2.2,94,2.4,26.0
8757,2023-12-31 21:00,1.7,,2.4,96,2.4,30.0
8758,2023-12-31 22:00,1.3,,0.6,95,2.4,31.0


In [25]:
weather_dust_2023['1시간평균 미세먼지농도(㎍/㎥)']

0       63.0
1       67.0
2       84.0
3       82.0
4       87.0
        ... 
8755    19.0
8756    26.0
8757    30.0
8758    31.0
8759    28.0
Name: 1시간평균 미세먼지농도(㎍/㎥), Length: 8760, dtype: float64

In [27]:
# 날짜 / 시간 분리 작업
dates = []
times = []

for idx in weather_dust_2023.index:
    date = weather_dust_2023['일시'].loc[idx][:10]
    time = int(weather_dust_2023['일시'].loc[idx][-5:-3])
    
    dates.append(date)
    times.append(time)

In [28]:
# 날짜, 시간대 열 생성
weather_dust_2023.insert(loc=1, column='날짜', value=dates)
weather_dust_2023.insert(loc=2, column='시간대', value=times)

In [29]:
# 기존 '일시' 열 삭제
weather_dust_2023.drop('일시', axis=1, inplace=True)

In [30]:
weather_dust_2023

Unnamed: 0,날짜,시간대,기온(°C),강수량(mm),풍속(m/s),습도(%),적설(cm),1시간평균 미세먼지농도(㎍/㎥)
0,2023-01-01,0,0.9,,1.4,72,,63.0
1,2023-01-01,1,1.5,,1.9,71,,67.0
2,2023-01-01,2,1.5,,1.9,72,,84.0
3,2023-01-01,3,1.6,,1.6,74,,82.0
4,2023-01-01,4,1.5,,1.4,74,,87.0
...,...,...,...,...,...,...,...,...
8755,2023-12-31,19,2.6,,0.8,90,2.4,19.0
8756,2023-12-31,20,1.9,,2.2,94,2.4,26.0
8757,2023-12-31,21,1.7,,2.4,96,2.4,30.0
8758,2023-12-31,22,1.3,,0.6,95,2.4,31.0


In [31]:
# 결측치 채우기 : 미세먼지농도와 풍속 null값은 이전 시간대의 것으로 채우기
weather_dust_2023['1시간평균 미세먼지농도(㎍/㎥)'] = weather_dust_2023['1시간평균 미세먼지농도(㎍/㎥)'].fillna(method='ffill')
weather_dust_2023['풍속(m/s)'] = weather_dust_2023['풍속(m/s)'].fillna(method='ffill')

# 결측치 채우기 : 강수량, 적설의 null값은 비나 눈이 오지 않은 것이므로 0으로 채우기
weather_dust_2023['강수량(mm)'] = weather_dust_2023['강수량(mm)'].fillna(0)
weather_dust_2023['적설(cm)'] = weather_dust_2023['적설(cm)'].fillna(0)

In [32]:
# 결측치가 제거된 것 확인
weather_dust_2023.isnull().sum()

날짜                   0
시간대                  0
기온(°C)               0
강수량(mm)              0
풍속(m/s)              0
습도(%)                0
적설(cm)               0
1시간평균 미세먼지농도(㎍/㎥)    0
dtype: int64

In [33]:
# 데이터 내보내기
weather_dust_2023.to_csv('[결측치 제거]2023년 날씨+미세먼지정보.csv', index=False, encoding='cp949')