In [26]:
import pandas as pd

# CSV 파일 읽기
df_all6 = pd.read_csv("df_all6.csv", encoding='utf-8')
df_forecast = pd.read_csv("추가발전소_기상데이터.csv", encoding='utf-8')
df_busan = pd.read_csv("부산_병합2.csv", encoding='utf-8')

print("📂 df_all6 컬럼명:")
print(df_all6.columns.tolist())

print("📂 df_forecast (추가발전소_기상데이터) 컬럼명:")
print(df_forecast.columns.tolist())



📂 df_all6 컬럼명:
['발전구분', '호기', 'year', 'month', 'day', 'hour', 'weekday', '발전량(kWh)', '설비용량(MW)']
📂 df_forecast (추가발전소_기상데이터) 컬럼명:
['Unnamed: 0', '지점명', 'year', 'month', 'day', 'weekday', '시간', '기온(°C)', '강수량(mm)', '풍속(m/s)', '풍향(16방위)', '습도(%)', '현지기압(hPa)', '일조(hr)', '일사(MJ/m2)', '적설(cm)', '전운량(10분위)']


In [10]:
df_forecast.head()

Unnamed: 0.1,Unnamed: 0,지점명,year,month,day,weekday,시간,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),현지기압(hPa),일조(hr),일사(MJ/m2),적설(cm),전운량(10분위)
0,0,충주,2021,8,1,6,0,25.7,,0.5,90.0,87.0,988.2,,,,4.0
1,1,충주,2021,8,1,6,1,25.7,,0.9,90.0,87.0,988.1,,,,3.0
2,2,충주,2021,8,1,6,2,25.1,,0.7,110.0,89.0,987.9,,,,3.0
3,3,충주,2021,8,1,6,3,25.2,,0.5,90.0,88.0,987.9,,,,8.0
4,4,충주,2021,8,1,6,4,24.8,,1.3,70.0,90.0,987.8,,,,4.0


In [11]:
df_busan.head()

Unnamed: 0,지점,지점명,일시,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),현지기압(hPa),일조(hr),일사(MJ/m2),적설(cm),전운량(10분위)
0,159,부산,2013-01-01 00:00,-2.3,,1.6,340.0,51.0,1012.5,,,,0.0
1,159,부산,2013-01-01 01:00,-2.4,,1.8,340.0,54.0,1012.3,,,,
2,159,부산,2013-01-01 02:00,-2.5,,1.9,340.0,53.0,1013.3,,,,
3,159,부산,2013-01-01 03:00,-2.0,,1.7,320.0,52.0,1013.4,,,,6.0
4,159,부산,2013-01-01 04:00,-2.3,,2.1,320.0,54.0,1013.0,,,,0.0


In [12]:
# ✅ 원래 행 수 저장
original_len = len(df_forecast)

# ✅ '부산' 행 수 확인 및 제거
busan_removed_df = df_forecast[df_forecast['지점명'] != '부산']
removed_len = original_len - len(busan_removed_df)
print(f"🗑️ 제거된 부산 행 수: {removed_len}")
print(f"🔎 제거 후 df_forecast 행 수: {len(busan_removed_df)}")

# ✅ df_busan에서 필요한 컬럼만 df_forecast 형식에 맞게 정렬 및 필터링
# 일시 → year, month, day, 시간 추출
df_busan['일시'] = pd.to_datetime(df_busan['일시'], errors='coerce')
df_busan['year'] = df_busan['일시'].dt.year
df_busan['month'] = df_busan['일시'].dt.month
df_busan['day'] = df_busan['일시'].dt.day
df_busan['시간'] = df_busan['일시'].dt.hour
df_busan['weekday'] = df_busan['일시'].dt.weekday

# ✅ df_forecast 형식 맞추기 (Unnamed: 0은 생략 가능)
columns_needed = [
    '지점명', 'year', 'month', 'day', 'weekday', '시간',
    '기온(°C)', '강수량(mm)', '풍속(m/s)', '풍향(16방위)',
    '습도(%)', '현지기압(hPa)', '일조(hr)', '일사(MJ/m2)', '적설(cm)', '전운량(10분위)'
]
df_busan_formatted = df_busan[columns_needed]

# ✅ 인덱스 리셋
df_busan_formatted = df_busan_formatted.reset_index(drop=True)
df_busan_formatted = df_busan_formatted.copy()

# ✅ 병합
df_forecast_updated = pd.concat([busan_removed_df, df_busan_formatted], ignore_index=True)

# ✅ 결과 확인
print(f"📌 df_forecast 병합 전: {original_len}행")
print(f"📌 df_forecast 병합 후: {len(df_forecast_updated)}행")
print(f"✅ 추가된 df_busan 행 수: {len(df_busan_formatted)}")


🗑️ 제거된 부산 행 수: 1416
🔎 제거 후 df_forecast 행 수: 903119
📌 df_forecast 병합 전: 904535행
📌 df_forecast 병합 후: 1009735행
✅ 추가된 df_busan 행 수: 106616


In [13]:
df_forecast_updated.head()

Unnamed: 0.1,Unnamed: 0,지점명,year,month,day,weekday,시간,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),현지기압(hPa),일조(hr),일사(MJ/m2),적설(cm),전운량(10분위)
0,0.0,충주,2021,8,1,6,0,25.7,,0.5,90.0,87.0,988.2,,,,4.0
1,1.0,충주,2021,8,1,6,1,25.7,,0.9,90.0,87.0,988.1,,,,3.0
2,2.0,충주,2021,8,1,6,2,25.1,,0.7,110.0,89.0,987.9,,,,3.0
3,3.0,충주,2021,8,1,6,3,25.2,,0.5,90.0,88.0,987.9,,,,8.0
4,4.0,충주,2021,8,1,6,4,24.8,,1.3,70.0,90.0,987.8,,,,4.0


In [14]:
print("📌 df_all6의 발전구분 목록:")
print(df_all6['발전구분'].unique())

# ✅ 일자(datetime) 컬럼 생성
df_all6['일자'] = pd.to_datetime(df_all6[['year', 'month', 'day']], errors='coerce')

# ✅ 발전구분별 시작일자와 종료일자 구하기
generation_period = df_all6.groupby('발전구분')['일자'].agg(시작일자='min', 종료일자='max').reset_index()

# ✅ 출력
print("📌 df_all6 발전구분별 기간:")
print(generation_period)



📌 df_all6의 발전구분 목록:
['감우리' '광양항광양냉장태양광' '광양항어울림태양광' '남제주소내' '농촌공진도태양광' '당진자재창고태양광' '당진태양광'
 '당진화력수상태양광' '무릉리' '부산복합자재창고' '부산본부' '부산수처리장' '부산신항' '부산운동장' '삼척소내' '송당리'
 '수원환경사업소태양광' '신인천 북측부지' '신인천 주차장' '신인천소내' '신인천전망대' '신인천해수구취수구' '신풍리'
 '영월본부' '영월철도부지' '용수리' '울산태양광#1' '위미2리' '이천D(백사면B)' '이천시 백사면A' '익산 다송리'
 '인천수산정수장' '하동공설운동장' '하동변전소' '하동보건소' '하동본부' '하동정수장' '하동하수처리장' '화촌주민참여형']
📌 df_all6 발전구분별 기간:
          발전구분       시작일자       종료일자
0          감우리 2021-08-05 2025-02-28
1   광양항광양냉장태양광 2022-06-01 2024-06-30
2    광양항어울림태양광 2021-01-01 2023-06-30
3        남제주소내 2013-01-01 2025-02-28
4     농촌공진도태양광 2019-01-01 2019-12-31
5    당진자재창고태양광 2022-04-01 2024-06-30
6        당진태양광 2021-01-01 2024-06-30
7    당진화력수상태양광 2022-04-01 2024-06-30
8          무릉리 2021-05-01 2025-02-28
9     부산복합자재창고 2013-01-01 2025-02-28
10        부산본부 2019-01-01 2025-02-28
11      부산수처리장 2018-01-01 2025-02-28
12        부산신항 2013-01-01 2025-02-28
13       부산운동장 2018-01-01 2025-02-28
14        삼척소내 2018-01-01 2025-02-28
15      

In [121]:
import pandas as pd

# ✅ 일자(datetime) 컬럼 생성
df_forecast['일자'] = pd.to_datetime(df_forecast[['year', 'month', 'day']], errors='coerce')

# ✅ 지점명별 시작일자와 종료일자 구하기
period_summary = df_forecast.groupby('지점명')['일자'].agg(시작일자='min', 종료일자='max').reset_index()

# ✅ 출력
print(period_summary)


    지점명       시작일자       종료일자
0   광양시 2021-01-01 2024-07-01
1    군산 2024-01-01 2025-01-01
2    동해 2017-01-01 2025-03-01
3    목포 2013-01-01 2025-03-01
4    부산 2013-01-01 2025-03-01
5   서귀포 2013-01-01 2025-03-01
6    서산 2021-01-01 2024-07-01
7    수원 2021-01-01 2024-01-01
8    영월 2013-01-01 2025-03-01
9    울산 2020-01-01 2025-01-01
10   이천 2021-01-01 2025-03-01
11   인천 2013-01-01 2025-03-01
12   제주 2019-01-01 2025-03-01
13   진주 2013-01-01 2025-03-01
14   천안 2021-01-01 2025-03-01
15   충주 2021-08-01 2025-03-01


In [17]:
df_forecast_updated.to_csv('additional_farms.csv', index=False, encoding='utf-8-sig')

In [21]:
df_all6.head()

Unnamed: 0,발전구분,호기,year,month,day,hour,weekday,발전량(kWh),설비용량(MW),일자
0,감우리,1,2021,8,5,1,3,0.0,0.554,2021-08-05
1,감우리,1,2021,8,6,1,4,0.0,0.554,2021-08-06
2,감우리,1,2021,8,7,1,5,0.0,0.554,2021-08-07
3,감우리,1,2021,8,8,1,6,0.0,0.554,2021-08-08
4,감우리,1,2021,8,9,1,0,0.0,0.554,2021-08-09


In [19]:
df_forecast_updated.head()

Unnamed: 0.1,Unnamed: 0,지점명,year,month,day,weekday,시간,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),현지기압(hPa),일조(hr),일사(MJ/m2),적설(cm),전운량(10분위),일자
0,0.0,충주,2021,8,1,6,0,25.7,,0.5,90.0,87.0,988.2,,,,4.0,2021-08-01
1,1.0,충주,2021,8,1,6,1,25.7,,0.9,90.0,87.0,988.1,,,,3.0,2021-08-01
2,2.0,충주,2021,8,1,6,2,25.1,,0.7,110.0,89.0,987.9,,,,3.0,2021-08-01
3,3.0,충주,2021,8,1,6,3,25.2,,0.5,90.0,88.0,987.9,,,,8.0,2021-08-01
4,4.0,충주,2021,8,1,6,4,24.8,,1.3,70.0,90.0,987.8,,,,4.0,2021-08-01


In [139]:
import pandas as pd

# CSV 파일 읽기
df_all6 = pd.read_csv("df_all6.csv", encoding='utf-8')
df_forecast_updated = pd.read_csv("additional_farms.csv", encoding='utf-8')

In [43]:
df_all6.head()

Unnamed: 0,발전구분,호기,year,month,day,hour,weekday,발전량(kWh),설비용량(MW)
0,감우리,1,2021,8,5,1,3,0.0,0.554
1,감우리,1,2021,8,6,1,4,0.0,0.554
2,감우리,1,2021,8,7,1,5,0.0,0.554
3,감우리,1,2021,8,8,1,6,0.0,0.554
4,감우리,1,2021,8,9,1,0,0.0,0.554


In [140]:
import pandas as pd

# '시간'이 0인 행을 찾음
mask = df_forecast_updated['시간'] == 0

# 날짜 컬럼을 datetime으로 변환
dates = pd.to_datetime(df_forecast_updated.loc[mask, ['year', 'month', 'day']])

# 하루 전으로 변경
dates = dates - pd.Timedelta(days=1)

# 변경된 날짜를 다시 year, month, day 컬럼에 할당
for i, date in zip(df_forecast_updated.loc[mask].index, dates):
    df_forecast_updated.at[i, 'year'] = date.year
    df_forecast_updated.at[i, 'month'] = date.month
    df_forecast_updated.at[i, 'day'] = date.day

# '시간'을 24로 변경
df_forecast_updated.loc[mask, '시간'] = 24



In [141]:
df_forecast_updated.head(24)

Unnamed: 0.1,Unnamed: 0,지점명,year,month,day,weekday,시간,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),현지기압(hPa),일조(hr),일사(MJ/m2),적설(cm),전운량(10분위),일자
0,0.0,충주,2021,7,31,6,24,25.7,,0.5,90.0,87.0,988.2,,,,4.0,2021-08-01
1,1.0,충주,2021,8,1,6,1,25.7,,0.9,90.0,87.0,988.1,,,,3.0,2021-08-01
2,2.0,충주,2021,8,1,6,2,25.1,,0.7,110.0,89.0,987.9,,,,3.0,2021-08-01
3,3.0,충주,2021,8,1,6,3,25.2,,0.5,90.0,88.0,987.9,,,,8.0,2021-08-01
4,4.0,충주,2021,8,1,6,4,24.8,,1.3,70.0,90.0,987.8,,,,4.0,2021-08-01
5,5.0,충주,2021,8,1,6,5,24.2,,0.1,0.0,91.0,987.9,,,,3.0,2021-08-01
6,6.0,충주,2021,8,1,6,6,24.2,,0.0,0.0,93.0,988.3,0.0,0.09,,4.0,2021-08-01
7,7.0,충주,2021,8,1,6,7,25.8,,0.4,0.0,88.0,988.3,0.5,0.46,,8.0,2021-08-01
8,8.0,충주,2021,8,1,6,8,26.8,,1.1,270.0,82.0,988.5,0.4,0.9,,8.0,2021-08-01
9,9.0,충주,2021,8,1,6,9,27.7,,0.9,290.0,80.0,988.7,0.7,0.98,,8.0,2021-08-01


In [142]:
import pandas as pd

# 1. 'Unnamed' 컬럼 제거
df_forecast_updated = df_forecast_updated.drop(columns=[col for col in df_forecast_updated.columns if 'Unnamed' in col], errors='ignore')

# 2. '시간' → 'hour'로 이름 변경
df_forecast_updated = df_forecast_updated.rename(columns={'시간': 'hour'})

# 3. 발전구분 → 지점명 매핑
plant_to_location = {
    "감우리": "진주",
    "광양항광양냉장태양광": "광양시",
    "광양항어울림태양광": "광양시",
    "남제주소내": "서귀포",
    "농촌공진도태양광": "목포",
    "당진자재창고태양광": "천안",
    "당진태양광": "천안",
    "당진화력수상태양광": "천안",
    "무릉리": "동해",
    "부산복합자재창고": "부산",
    "부산본부": "부산",
    "부산수처리장": "부산",
    "부산신항": "부산",
    "부산운동장": "부산",
    "삼척소내": "동해",
    "송당리": "제주",
    "수원환경사업소태양광": "수원",
    "신인천 북측부지": "인천",
    "신인천 주차장": "인천",
    "신인천소내": "인천",
    "신인천전망대": "인천",
    "신인천해수구취수구": "인천",
    "신풍리": "천안",
    "영월본부": "영월",
    "영월철도부지": "영월",
    "용수리": "충주",
    "울산태양광#1": "울산",
    "위미2리": "서귀포",
    "이천D(백사면B)": "이천",
    "이천시 백사면A": "이천",
    "익산 다송리": "군산",
    "인천수산정수장": "인천",
    "하동공설운동장": "진주",
    "하동변전소": "진주",
    "하동보건소": "진주",
    "하동본부": "진주",
    "하동정수장": "진주",
    "하동하수처리장": "진주",
    "화촌주민참여형": "영월"
}

if '지점명' not in df_all6.columns:
    df_all6['지점명'] = df_all6['발전구분'].map(plant_to_location)

# ✅ 4. df_all6에 '일자' 컬럼 생성
df_all6['일자'] = pd.to_datetime(df_all6[['year', 'month', 'day']])

# ✅ 5. df_forecast_updated '일자' 변환
df_forecast_updated['일자'] = pd.to_datetime(df_forecast_updated['일자'])

# ✅ 6. 병합
df_merged = pd.merge(
    df_all6,
    df_forecast_updated,
    on=['지점명', '일자', 'hour'],
    how='left',
    indicator=True
)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# ✅ 7. 매핑 결과 확인
total = len(df_merged)
matched = (df_merged['_merge'] == 'both').sum()
unmatched = total - matched

print(f"✅ 전체 행 수: {total}")
print(f"✅ 매핑된 행 수: {matched}")
print(f"❌ 매핑 안 된 행 수: {unmatched}")

# 1. 매핑되지 않은 행만 추출
unmatched_rows = df_merged[df_merged['_merge'] != 'both']

# 2. 지점명 기준으로 그룹화 후 전체 출력
if unmatched_rows.empty:
    print("✅ 모든 행이 매핑되었습니다!")
else:
    print(f"❌ 매핑 안 된 전체 행 수: {len(unmatched_rows)}")
    print("\n🔍 지점별 매핑되지 않은 예시:")

    grouped = unmatched_rows.groupby('지점명')

    for location, group in grouped:
        print(f"\n📍 지점명: {location} (총 {len(group)}건)")
        # 🔻 여기에서 .head() 제거하여 전체 출력
        print(group[['발전구분', '지점명', '일자', 'hour']])


# ✅ 9. _merge 컬럼 제거
df_merged = df_merged.drop(columns=['_merge'])

# ✅ 10. 저장 (선택 사항)
# df_merged.to_csv('df_merged_final.csv', index=False, encoding='utf-8-sig')


✅ 전체 행 수: 2822571
✅ 매핑된 행 수: 2815116
❌ 매핑 안 된 행 수: 7455
❌ 매핑 안 된 전체 행 수: 7455

🔍 지점별 매핑되지 않은 예시:

📍 지점명: 광양시 (총 1건)
              발전구분  지점명         일자  hour
1884259  광양항어울림태양광  광양시 2022-05-14    17

📍 지점명: 군산 (총 1건)
           발전구분 지점명         일자  hour
2774271  익산 다송리  군산 2024-01-01    24

📍 지점명: 서귀포 (총 18건)
          발전구분  지점명         일자  hour
4895     남제주소내  서귀포 2018-04-04     1
122501   남제주소내  서귀포 2018-04-04     2
240107   남제주소내  서귀포 2018-04-04     3
357713   남제주소내  서귀포 2018-04-04     4
474240   남제주소내  서귀포 2015-04-21     5
474307   남제주소내  서귀포 2015-06-27     5
475319   남제주소내  서귀포 2018-04-04     5
592925   남제주소내  서귀포 2018-04-04     6
710531   남제주소내  서귀포 2018-04-04     7
828137   남제주소내  서귀포 2018-04-04     8
945743   남제주소내  서귀포 2018-04-04     9
1063349  남제주소내  서귀포 2018-04-04    10
1179891  남제주소내  서귀포 2015-05-06    11
1180955  남제주소내  서귀포 2018-04-04    11
1297497  남제주소내  서귀포 2015-05-06    12
1651168  남제주소내  서귀포 2017-09-05    15
2473737  남제주소내  서귀포 2015-11-02    22
2709833  남제주소내  서귀포 2018

In [143]:
# 1. '일자' 컬럼 삭제
df_merged = df_merged.drop(columns=['일자'], errors='ignore')

# 2. 날짜 컬럼 정리
# 우선 x 우선으로 선택, 없으면 y
df_merged['year'] = df_merged['year_x'].fillna(df_merged['year_y'])
df_merged['month'] = df_merged['month_x'].fillna(df_merged['month_y'])
df_merged['day'] = df_merged['day_x'].fillna(df_merged['day_y'])

# 3. 기존 _x, _y 컬럼 제거
df_merged = df_merged.drop(columns=[
    'year_x', 'month_x', 'day_x', 'weekday_x',
    'year_y', 'month_y', 'day_y', 'weekday_y'
], errors='ignore')


In [145]:
df_merged.head()    

Unnamed: 0,발전구분,호기,hour,발전량(kWh),설비용량(MW),지점명,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),현지기압(hPa),일조(hr),일사(MJ/m2),적설(cm),전운량(10분위),year,month,day
0,감우리,1,1,0.0,0.554,진주,24.5,,0.0,0.0,92.0,1004.9,,,,2.0,2021,8,5
1,감우리,1,1,0.0,0.554,진주,24.4,,0.0,0.0,94.0,1001.6,,,,3.0,2021,8,6
2,감우리,1,1,0.0,0.554,진주,24.4,,0.2,0.0,87.0,999.1,,,,1.0,2021,8,7
3,감우리,1,1,0.0,0.554,진주,23.6,0.4,0.6,360.0,96.0,1000.7,,,,9.0,2021,8,8
4,감우리,1,1,0.0,0.554,진주,24.5,,0.3,0.0,86.0,996.8,,,,8.0,2021,8,9


In [146]:
print("✅ df_merged의 행 개수:", len(df_merged))


✅ df_merged의 행 개수: 2822571


In [149]:
df_merged.to_csv('df_add_farms.csv', index=False, encoding='utf-8-sig')
