In [5]:
import pandas as pd

# CSV 파일 읽기
df_solar = pd.read_csv('df_solar_final_sorted.csv')
df_sunshine = pd.read_csv('일조_yearmonthdaydweekday정렬.csv')

In [8]:
df_sunshine.head()

Unnamed: 0.1,Unnamed: 0,지점명,year,month,day,weekday,hour,일조(hr)
0,0,양산시,2023,1,1,6,8,0.0
1,1,양산시,2023,1,1,6,9,0.3
2,2,양산시,2023,1,1,6,10,1.0
3,3,양산시,2023,1,1,6,11,1.0
4,4,양산시,2023,1,1,6,12,1.0


In [11]:
# '지점명'이 '세종'인 경우 → '청주'로 변경
df_sunshine['지점명'] = df_sunshine['지점명'].replace('세종', '청주')


In [13]:
df_sunshine.head()

Unnamed: 0.1,Unnamed: 0,지점명,year,month,day,weekday,hour,일조(hr),일자
0,0,양산시,2023,1,1,6,8,0.0,2023-01-01
1,1,양산시,2023,1,1,6,9,0.3,2023-01-01
2,2,양산시,2023,1,1,6,10,1.0,2023-01-01
3,3,양산시,2023,1,1,6,11,1.0,2023-01-01
4,4,양산시,2023,1,1,6,12,1.0,2023-01-01


In [12]:
df_solar.head()

Unnamed: 0,발전구분,지점명,설비용량(MW),연식(년),year,month,day,hour,weekday,일사(MJ/m2),...,방위각,풍속(m/s),풍향(16방위),기온(°C),하늘상태,습도(%),강수량(mm),적설(cm),발전량(kWh),일자
0,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,1,6,0.0,...,,2.4,180.0,1.9,1.0,54.0,0.0,0.0,0.0,2023-01-01
1,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,2,6,0.0,...,,2.4,200.0,1.5,1.0,54.0,0.0,0.0,0.0,2023-01-01
2,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,3,6,0.0,...,,0.8,250.0,-1.2,1.0,72.0,0.0,0.0,0.0,2023-01-01
3,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,4,6,0.0,...,,1.5,250.0,-0.5,1.0,70.0,0.0,0.0,0.0,2023-01-01
4,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,5,6,0.0,...,,1.9,230.0,-1.4,1.0,73.0,0.0,0.0,0.0,2023-01-01


In [15]:
# 병합에 사용할 키 생성 (선택 사항, 이미 각 컬럼이 존재한다면 생략 가능)
merge_keys = ['지점명', 'year', 'month', 'day', 'hour']

# 필요한 컬럼만 추출 (중복 방지)
df_sunshine_merge = df_sunshine[merge_keys + ['일조(hr)']]

# 병합
df_solar_merged = pd.merge(df_solar, df_sunshine_merge, on=merge_keys, how='left')



In [16]:
# 시간별 평균 일조량 계산
hourly_irradiation_mean = df_solar_merged.groupby('hour')['일조(hr)'].mean()
print(hourly_irradiation_mean)


hour
1          NaN
2          NaN
3          NaN
4          NaN
5          NaN
6     0.010690
7     0.142932
8     0.297051
9     0.517409
10    0.615908
11    0.656454
12    0.671834
13    0.675737
14    0.666891
15    0.652503
16    0.633317
17    0.548024
18    0.348245
19    0.206838
20    0.024474
21         NaN
22         NaN
23         NaN
24         NaN
Name: 일조(hr), dtype: float64


In [27]:
# 시간별 결측치 개수
hourly_nan_count = df_solar_merged.groupby('hour')['일조(hr)'].apply(lambda x: x.isna().sum())
print(hourly_nan_count)


hour
1     8387
2     8387
3     8387
4     8387
5     8387
6     5356
7     2820
8      214
9      213
10     215
11     214
12     214
13     214
14     217
15     216
16     211
17     212
18     210
19    2713
20    5110
21    8387
22    8387
23    8387
24    8405
Name: 일조(hr), dtype: int64


In [24]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# 낮 시간대 정의 (7시부터 18시까지)
daytime_nan_rows = df_solar_merged[
    (df_solar_merged['hour'].between(8, 18)) & (df_solar_merged['일조(hr)'].isna())
]

# 결과 출력
print(daytime_nan_rows[['지점명', 'year', 'month', 'day', 'hour', '일조(hr)']])
print(f"\n총 낮 시간대 결측치 행 수: {len(daytime_nan_rows)}")


        지점명  year  month  day  hour  일조(hr)
3945    양산시  2023      6   14    10     NaN
3946    양산시  2023      6   14    11     NaN
3947    양산시  2023      6   14    12     NaN
3948    양산시  2023      6   14    13     NaN
3949    양산시  2023      6   14    14     NaN
3950    양산시  2023      6   14    15     NaN
8200    양산시  2023     12    8    17     NaN
8201    양산시  2023     12    8    18     NaN
12705   양산시  2023      6   14    10     NaN
12706   양산시  2023      6   14    11     NaN
12707   양산시  2023      6   14    12     NaN
12708   양산시  2023      6   14    13     NaN
12709   양산시  2023      6   14    14     NaN
12710   양산시  2023      6   14    15     NaN
16960   양산시  2023     12    8    17     NaN
16961   양산시  2023     12    8    18     NaN
26143    청주  2017     12   31     8     NaN
26144    청주  2017     12   31     9     NaN
26145    청주  2017     12   31    10     NaN
26146    청주  2017     12   31    11     NaN
26147    청주  2017     12   31    12     NaN
26148    청주  2017     12   31   

In [25]:
# 상관계수 계산에 필요한 컬럼만 추출
cols = ['발전량(kWh)', '일조(hr)', '일사(MJ/m2)']
correlation_matrix = df_solar_merged[cols].corr(method='pearson')

# 출력
print("📊 발전량 vs 일조 vs 일사(MJ/m2) 상관관계:")
print(correlation_matrix)


📊 발전량 vs 일조 vs 일사(MJ/m2) 상관관계:
           발전량(kWh)    일조(hr)  일사(MJ/m2)
발전량(kWh)   1.000000  0.382402   0.577385
일조(hr)     0.382402  1.000000   0.739031
일사(MJ/m2)  0.577385  0.739031   1.000000


In [28]:
# ✅ CSV로 저장
df_solar_merged.to_csv('df_solar_sunshine.csv', index=False, encoding='utf-8-sig')
