In [2]:
import pandas as pd

# CSV 파일 읽기
df_solar = pd.read_csv('df_solar_real_final_yearmonthdaydate.csv')

In [3]:
def summarize_unique_values_full(df, threshold=50):
    print("✅ 컬럼별 유니크 값 및 개수 요약\n")
    
    for col in df.columns:
        unique_vals = df[col].dropna().unique()
        nunique = len(unique_vals)
        
        print(f"\n🔹 {col} ({df[col].dtype}) → 고유값 {nunique}개")

        if nunique > threshold:
            print("   ⮑ 연속형 데이터로 판단됨 (고유값 50개 초과)\n")
        else:
            value_counts = df[col].value_counts(dropna=True)
            for val, count in value_counts.items():
                print(f"   • {val} : {count}개")

# 실행
summarize_unique_values_full(df_solar)

✅ 컬럼별 유니크 값 및 개수 요약


🔹 발전구분 (object) → 고유값 6개
   • 영암에프원태양광b : 61350개
   • 안산연성정수장태양광 : 56886개
   • 세종시폐기물매립장태양광 : 56814개
   • (군산)삼랑진태양광1 : 8760개
   • (군산)삼랑진태양광2 : 8760개
   • 태안#9,10 수상태양광 : 8736개

🔹 year (int64) → 고유값 7개
   • 2023 : 43656개
   • 2020 : 26355개
   • 2018 : 26283개
   • 2019 : 26283개
   • 2021 : 26283개
   • 2022 : 26283개
   • 2017 : 26163개

🔹 month (int64) → 고유값 12개
   • 1 : 17856개
   • 3 : 17856개
   • 5 : 17808개
   • 6 : 17280개
   • 4 : 17136개
   • 12 : 16386개
   • 7 : 16368개
   • 8 : 16368개
   • 10 : 16368개
   • 2 : 16200개
   • 9 : 15840개
   • 11 : 15840개

🔹 day (int64) → 고유값 31개
   • 1 : 6624개
   • 21 : 6624개
   • 19 : 6624개
   • 18 : 6624개
   • 22 : 6624개
   • 2 : 6624개
   • 15 : 6624개
   • 14 : 6624개
   • 13 : 6624개
   • 23 : 6624개
   • 24 : 6624개
   • 25 : 6624개
   • 26 : 6624개
   • 27 : 6624개
   • 28 : 6624개
   • 6 : 6624개
   • 5 : 6624개
   • 4 : 6624개
   • 3 : 6624개
   • 20 : 6624개
   • 16 : 6600개
   • 17 : 6600개
   • 12 : 6600개
   • 11 : 6600개
   • 10 : 6600개
 

In [5]:
df_solar.head()

Unnamed: 0,발전구분,year,month,day,weekday,설비용량(MW),시간,발전량(kWh),연식(년),지점명,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),일사(MJ/m2),적설(cm),하늘상태,태양고도,방위각
0,세종시폐기물매립장태양광,2017,1,1,6,1.628,1,0.0,4.6,청주,2.2,0.0,0.2,0.0,78.0,0.0,0.0,,0.0,
1,안산연성정수장태양광,2017,1,1,6,1.49,1,0.0,4.4,수원,0.1,0.0,0.3,0.0,96.0,0.0,0.0,,0.0,
2,영암에프원태양광b,2017,1,1,6,13.296,1,0.0,4.1,목포,5.0,0.0,1.8,160.0,93.0,0.0,0.0,,0.0,
3,세종시폐기물매립장태양광,2017,1,1,6,1.628,2,0.0,4.6,청주,1.9,0.0,0.5,340.0,80.0,0.0,0.0,,0.0,
4,안산연성정수장태양광,2017,1,1,6,1.49,2,0.0,4.4,수원,-1.1,0.0,0.0,0.0,97.0,0.0,0.0,,0.0,


In [16]:
# ✅ '시간' 컬럼명을 'hour'로 변경
df_solar = df_solar.rename(columns={'시간': 'hour'})

# ✅ 정렬 기준 컬럼 리스트 (상관계수 기반 중요도 반영 + 컬럼명 수정 반영)
sorted_columns = [
    '발전구분', '지점명',
    '설비용량(MW)', '연식(년)',
    'year', 'month', 'day', 'hour', 'weekday',
    '일사(MJ/m2)', '태양고도', '방위각', '풍속(m/s)', '풍향(16방위)',
    '기온(°C)', '하늘상태', '습도(%)',
    '강수량(mm)', '적설(cm)',
    '발전량(kWh)'
]

# ✅ 누락된 컬럼은 뒤에 추가
remaining_columns = [col for col in df_solar.columns if col not in sorted_columns]
final_columns = sorted_columns + remaining_columns

# ✅ 컬럼 순서 정렬
df_solar = df_solar[final_columns]

# ✅ Row 정렬: 발전소 → 날짜 → 시간
df_solar = df_solar.sort_values(by=['발전구분', 'year', 'month', 'day', 'hour']).reset_index(drop=True)

# ✅ CSV로 저장
df_solar.to_csv("df_solar_final_sorted.csv", index=False, encoding='utf-8-sig')


In [15]:
df_solar.head(80)

Unnamed: 0,발전구분,지점명,설비용량(MW),연식(년),year,month,day,hour,weekday,일사(MJ/m2),태양고도,방위각,풍속(m/s),풍향(16방위),기온(°C),하늘상태,습도(%),강수량(mm),적설(cm),발전량(kWh)
0,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,1,6,0.00,0.000000,,2.4,180.0,1.9,1.0,54.0,0.0,0.0,0.0
1,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,2,6,0.00,0.000000,,2.4,200.0,1.5,1.0,54.0,0.0,0.0,0.0
2,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,3,6,0.00,0.000000,,0.8,250.0,-1.2,1.0,72.0,0.0,0.0,0.0
3,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,4,6,0.00,0.000000,,1.5,250.0,-0.5,1.0,70.0,0.0,0.0,0.0
4,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,1,5,6,0.00,0.000000,,1.9,230.0,-1.4,1.0,73.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,4,4,2,0.00,0.000000,,0.5,230.0,-1.5,1.0,33.0,0.0,0.0,0.0
76,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,4,5,2,0.00,0.000000,,5.5,320.0,0.6,1.0,29.0,0.0,0.0,0.0
77,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,4,6,2,0.00,0.000000,,4.2,320.0,0.5,1.0,32.0,0.0,0.0,0.0
78,(군산)삼랑진태양광1,양산시,2.0,14.8,2023,1,4,7,2,0.00,0.000000,,1.1,230.0,-1.4,1.0,39.0,0.0,0.0,0.0
