In [None]:
# Library
import chardet
import pandas as pd

In [None]:
# Detect encoding
file = 'data/electricity_consumption_kor.csv'
with open(file, 'rb') as f:
    raw_data = f.read(10000)
    result = chardet.detect(raw_data)
    print(result)

In [None]:
# Load electricity consumption data with EUC-KR encoding
df_electricity = pd.read_csv('data/electricity_consumption_kor.csv', encoding='utf-8')
df_electricity = df_electricity.drop(columns=['시군구'])

# Rename headers
df_electricity.columns = ["Year",
						  "Month",
						  "Province",
						  "Number of Households",
						  "Avg Power Consumption per Household (kWh)",
						  "Avg Electricity Btill per Household (KRW)",
						  "Number of Tropical Nights",
						  "Number of Heatwave Days",
						  "Number of Coldwave Days"]

# Translate Korean to English
kor_to_eng = {"서울특별시": "Seoul",
    		  "부산광역시": "Busan",
    		  "대구광역시": "Daegu",
    		  "인천광역시": "Incheon",
    		  "광주광역시": "Gwangju",
    		  "대전광역시": "Daejeon",
    		  "울산광역시": "Ulsan"}
df_electricity['Province'] = df_electricity['Province'].map(kor_to_eng)

# Save the translated csv file in UTF-8 encoding
df_electricity.to_csv('data/electricity_consumption_eng.csv', index=False, encoding='utf-8')

In [None]:
# Load weather forecast data with EUC-KR encoding
df_weather = pd.read_csv('data/weather_forecast_kor.csv', encoding='EUC-KR')
df_weather = df_weather.drop(columns=['지점'])

# Rename headers
df_weather.columns = ["Station Name",
    				  "Year",
    				  "Month",
    				  "Avg Temperature (Celsius)",
    				  "Avg Max Temperature (Celsius)",
    				  "Avg Min Temperature (Celsius)",
    				  "Avg Local Pressure (hPa)",
    				  "Avg Sea Level Pressure (hPa)",
    				  "Avg Vapor Pressure (hPa)",
    				  "Avg Dew Point Temp (Celsius)",
    				  "Avg Relative Humidity (%)",
					  "Monthly Precipitation (mm)",
    				  "Small Pan Evaporation (mm)",
    				  "Avg Wind Speed (m/s)",
    				  "Max Wind Speed (m/s)",
    				  "Avg Cloud Cover (1/10)",
    				  "Total Sunshine Hours (hr)",
    				  "Sunshine Rate (%)",
    				  "Total Solar Radiation (MJ/m^2)",
    				  "Avg Min Surface Temp (Celsius)",
    				  "Avg Ground Temp (Celsius)"]

# Translate Korean to English
kor_to_eng = {"서울": "Seoul",
    		  "부산": "Busan",
    		  "대구": "Daegu",
    		  "인천": "Incheon",
    		  "광주": "Gwangju",
    		  "대전": "Daejeon",
    		  "울산": "Ulsan"}
df_weather['Station Name'] = df_weather['Station Name'].map(kor_to_eng)

# Save the translated csv file in UTF-8 encoding
df_weather.to_csv('data/weather_forecast_eng.csv', index=False, encoding='utf-8')