In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import tqdm

In [2]:
def fahrenheit_to_celsius(fahrenheit):
    """
    화씨(Fahrenheit) 기온을 섭씨(Celsius)로 변환하는 함수

    Parameters:
    fahrenheit (float): 화씨 기온

    Returns:
    float: 섭씨 기온
    """
    celsius = (fahrenheit - 32) * 5.0/9.0
    celsius = round(celsius, 1)
    return celsius

In [41]:
def average_angle_ignore_nan(degrees):
    """
    주어진 각도의 리스트에서 NaN 값을 무시하고 평균을 계산합니다.
    
    :param degrees: 각도의 리스트 (0-360도)
    :return: 평균 각도 (0-360도)
    """
    # NaN 값을 무시하고 유효한 각도만 선택
    valid_degrees = [deg for deg in degrees if not pd.isna(deg)]
    
    if not valid_degrees:
        return np.nan  # 유효한 각도가 없는 경우 NaN 반환
    
    # 유효한 각도를 라디안으로 변환
    radians = np.deg2rad(valid_degrees)
    
    # x, y 좌표 계산
    x_coords = np.cos(radians)
    y_coords = np.sin(radians)
    
    # x, y 좌표의 평균 계산
    x_mean = np.mean(x_coords)
    y_mean = np.mean(y_coords)
    
    # 평균 좌표를 각도로 변환
    mean_rad = np.arctan2(y_mean, x_mean)
    mean_deg = np.rad2deg(mean_rad)
    
    # 결과를 0-360도 사이의 값으로 변환
    mean_deg = mean_deg % 360
    
    return mean_deg

In [3]:
df = pd.read_excel("./6_9_cn.xlsx", header = None)

In [4]:
df.columns = ["지점번호", "지점명", "시간", "기온", "이슬점", "습도", "풍향", "풍속", "GUST 풍속", "현지기압", "강수량"]

In [5]:
df = df.drop("GUST 풍속", axis = 1)

In [6]:
df["기온"] = df["기온"].map(lambda x: fahrenheit_to_celsius(int(x[:-2])))

In [7]:
df["이슬점"] = df["이슬점"].map(lambda x: fahrenheit_to_celsius(int(x[:-2])))

In [8]:
df["풍속"] = df["풍속"].map(lambda x: round(int(x[:-4])*0.44704, 0))

In [9]:
df["강수량"] = 0

In [10]:
df["습도"] = df["습도"].map(lambda x: float(x[:-2]))

In [11]:
wd_dict = {
    "N" : 0,
    "NNE" : 22.5,
    "NE" : 45,
    "ENE" : 67.5,
    "E" : 90,
    "ESE" : 112.5,
    "SE" : 135,
    "SSE" : 157.5,
    "S" : 180,
    "SSW" : 202.5,
    "SW" : 225,
    "WSW" : 247.5,
    "W" : 270,
    "WNW" : 292.5,
    "NW" : 315,
    "NNW" : 337.5,
    "CALM" : np.nan,
    "VAR" : np.nan
}

In [12]:
df["풍향"] = df["풍향"].map(lambda x: wd_dict[x])

In [13]:
df["풍향"] = df["풍향"].map(lambda x: round(x, -1))

In [14]:
df["현지기압"] = df["현지기압"].map(lambda x: round(float(x[:-3]) * 33.8639, 1))

In [15]:
df

Unnamed: 0,지점번호,지점명,시간,기온,이슬점,습도,풍향,풍속,현지기압,강수량
0,50745,치치하르,02:00:00,11.7,9.4,87.0,340.0,1.0,987.1,0
1,50745,치치하르,05:00:00,11.7,8.9,83.0,340.0,1.0,987.5,0
2,50745,치치하르,08:00:00,16.1,8.3,60.0,320.0,2.0,988.1,0
3,50745,치치하르,11:00:00,20.6,7.2,43.0,320.0,3.0,987.1,0
4,50745,치치하르,14:00:00,23.3,7.8,36.0,340.0,3.0,986.5,0
...,...,...,...,...,...,...,...,...,...,...
274,59287,광조우,21:30:00,26.1,26.1,100.0,70.0,2.0,1007.1,0
275,59287,광조우,22:00:00,26.1,26.1,100.0,,1.0,1007.1,0
276,59287,광조우,22:30:00,26.1,26.1,100.0,,1.0,1007.1,0
277,59287,광조우,23:00:00,26.1,26.1,100.0,70.0,2.0,1007.1,0


In [63]:
time_list = pd.date_range(start = "2024-06-09 00:00", end = "2024-06-09 21:00", freq = "3H")

In [64]:
time_list

DatetimeIndex(['2024-06-09 00:00:00', '2024-06-09 03:00:00',
               '2024-06-09 06:00:00', '2024-06-09 09:00:00',
               '2024-06-09 12:00:00', '2024-06-09 15:00:00',
               '2024-06-09 18:00:00', '2024-06-09 21:00:00'],
              dtype='datetime64[ns]', freq='3H')

In [65]:
time_list2 = []

for _ in range(13):
    for time in time_list:
        time_list2.append(time)
        
time_df = pd.DataFrame(time_list2)

In [66]:
time_df[1] = 0

In [67]:
time_df[2] = 0

In [68]:
time_df

Unnamed: 0,0,1,2
0,2024-06-09 00:00:00,0,0
1,2024-06-09 03:00:00,0,0
2,2024-06-09 06:00:00,0,0
3,2024-06-09 09:00:00,0,0
4,2024-06-09 12:00:00,0,0
...,...,...,...
99,2024-06-09 09:00:00,0,0
100,2024-06-09 12:00:00,0,0
101,2024-06-09 15:00:00,0,0
102,2024-06-09 18:00:00,0,0


In [69]:
index = 0

for loc in df["지점번호"].unique():
    for _ in range(8):
        time_df.loc[index, 1] = loc
        index += 1
    

In [70]:
index = 0

for loc in df["지점명"].unique():
    for _ in range(8):
        time_df.loc[index, 2] = loc
        index += 1

  time_df.loc[index, 2] = loc


In [71]:
time_df.columns = ["일시", "지점번호", "지점명"]

In [72]:
time_df = time_df.set_index("일시")

In [26]:
date = "2024-06-09"

In [27]:
df["시간"] = df["시간"].map(lambda x: date + " " + str(x))

In [28]:
df["시간"] = df["시간"].map(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))

In [29]:
df = df.iloc[:, [2, 0, 1, 3, 4, 5, 6, 7, 8, 9]]

In [30]:
df.columns = ['일시', '지점번호', '지점명', '기온', '이슬점', '습도', '풍향', '풍속', '현지기압', '강수량']

In [31]:
df = df.set_index("일시")

In [32]:
df

Unnamed: 0_level_0,지점번호,지점명,기온,이슬점,습도,풍향,풍속,현지기압,강수량
일시,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-06-09 02:00:00,50745,치치하르,11.7,9.4,87.0,340.0,1.0,987.1,0
2024-06-09 05:00:00,50745,치치하르,11.7,8.9,83.0,340.0,1.0,987.5,0
2024-06-09 08:00:00,50745,치치하르,16.1,8.3,60.0,320.0,2.0,988.1,0
2024-06-09 11:00:00,50745,치치하르,20.6,7.2,43.0,320.0,3.0,987.1,0
2024-06-09 14:00:00,50745,치치하르,23.3,7.8,36.0,340.0,3.0,986.5,0
...,...,...,...,...,...,...,...,...,...
2024-06-09 21:30:00,59287,광조우,26.1,26.1,100.0,70.0,2.0,1007.1,0
2024-06-09 22:00:00,59287,광조우,26.1,26.1,100.0,,1.0,1007.1,0
2024-06-09 22:30:00,59287,광조우,26.1,26.1,100.0,,1.0,1007.1,0
2024-06-09 23:00:00,59287,광조우,26.1,26.1,100.0,70.0,2.0,1007.1,0


In [74]:
for column in df.columns[2:]:
    time_df[column] = np.nan

In [75]:
time_df

Unnamed: 0_level_0,지점번호,지점명,기온,이슬점,습도,풍향,풍속,현지기압,강수량
일시,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-06-09 00:00:00,50745,치치하르,,,,,,,
2024-06-09 03:00:00,50745,치치하르,,,,,,,
2024-06-09 06:00:00,50745,치치하르,,,,,,,
2024-06-09 09:00:00,50745,치치하르,,,,,,,
2024-06-09 12:00:00,50745,치치하르,,,,,,,
...,...,...,...,...,...,...,...,...,...
2024-06-09 09:00:00,59287,광조우,,,,,,,
2024-06-09 12:00:00,59287,광조우,,,,,,,
2024-06-09 15:00:00,59287,광조우,,,,,,,
2024-06-09 18:00:00,59287,광조우,,,,,,,


In [77]:
time_df.columns[2:]

Index(['기온', '이슬점', '습도', '풍향', '풍속', '현지기압', '강수량'], dtype='object')

In [106]:
data = []

for time, loc in zip(time_df.index, time_df["지점번호"]):
    start = time - timedelta(hours = 3)
    end = time
    
    temp_df = df[(df.index > start) & (df.index <= end) & (df["지점번호"] == loc)]
    
    temperature = round(temp_df.loc[:, "기온"].mean(), 1)
    dew_point = round(temp_df.loc[:, "이슬점"].mean(), 1)
    humidity = round(temp_df.loc[:, "습도"].mean(), 1)
    wind_dir = round(average_angle_ignore_nan(temp_df.loc[:, "풍향"]), -1)
    wind_speed = round(temp_df.loc[:, "풍속"].mean(), 1)
    air_press = round(temp_df.loc[:, "현지기압"].mean(), 1)
    percip = round(temp_df.loc[:, "강수량"].sum(), 1)
    
    value_list =  [temperature, dew_point, humidity, wind_dir, wind_speed, air_press, percip]
    
    data.append(value_list)
    
    print(value_list)
    
    for value, column in zip(value_list, time_df.columns[2:]):
        time_df.loc[time, column] = value

[nan, nan, nan, nan, nan, nan, 0]
[11.7, 9.4, 87.0, 340.0, 1.0, 987.1, 0]
[11.7, 8.9, 83.0, 340.0, 1.0, 987.5, 0]
[16.1, 8.3, 60.0, 320.0, 2.0, 988.1, 0]
[20.6, 7.2, 43.0, 320.0, 3.0, 987.1, 0]
[23.3, 7.8, 36.0, 340.0, 3.0, 986.5, 0]
[24.4, 6.7, 31.0, 340.0, 3.0, 986.1, 0]
[20.0, 8.9, 48.0, nan, 0.0, 986.5, 0]
[23.9, 3.9, 27.0, nan, 1.0, 935.7, 0]
[24.1, 4.1, 27.2, 230.0, 3.2, 936.2, 0]
[23.3, 6.7, 33.8, 200.0, 2.7, 936.8, 0]
[27.3, 7.8, 29.0, 280.0, 2.8, 937.9, 0]
[30.4, 7.7, 24.0, 10.0, 2.3, 937.6, 0]
[32.1, 7.8, 22.2, 360.0, 3.2, 936.0, 0]
[32.8, 7.5, 20.5, 340.0, 4.2, 935.0, 0]
[30.4, 8.0, 24.8, 290.0, 3.5, 935.9, 0]
[nan, nan, nan, nan, nan, nan, 0]
[20.0, 9.4, 51.0, 220.0, 2.0, 885.9, 0]
[18.9, 9.4, 54.0, 200.0, 1.0, 885.5, 0]
[21.7, 10.6, 49.0, 200.0, 2.0, 886.6, 0]
[26.1, 11.1, 38.0, 180.0, 2.0, 886.6, 0]
[29.4, 3.9, 20.0, 180.0, 3.0, 885.5, 0]
[30.0, 5.6, 21.0, 200.0, 2.0, 883.2, 0]
[25.6, 9.4, 36.0, nan, 0.0, 883.2, 0]
[nan, nan, nan, nan, nan, nan, 0]
[16.1, 7.2, 54.0, 220.0

In [110]:
time_df.iloc[:, 2:] = data

In [111]:
time_df

Unnamed: 0_level_0,지점번호,지점명,기온,이슬점,습도,풍향,풍속,현지기압,강수량
일시,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-06-09 00:00:00,50745,치치하르,,,,,,,0.0
2024-06-09 03:00:00,50745,치치하르,11.7,9.4,87.0,340.0,1.0,987.1,0.0
2024-06-09 06:00:00,50745,치치하르,11.7,8.9,83.0,340.0,1.0,987.5,0.0
2024-06-09 09:00:00,50745,치치하르,16.1,8.3,60.0,320.0,2.0,988.1,0.0
2024-06-09 12:00:00,50745,치치하르,20.6,7.2,43.0,320.0,3.0,987.1,0.0
...,...,...,...,...,...,...,...,...,...
2024-06-09 09:00:00,59287,광조우,25.4,25.4,100.0,70.0,2.0,1005.4,0.0
2024-06-09 12:00:00,59287,광조우,26.4,26.3,99.1,100.0,2.0,1006.4,0.0
2024-06-09 15:00:00,59287,광조우,27.6,27.0,95.2,100.0,2.8,1005.8,0.0
2024-06-09 18:00:00,59287,광조우,27.8,26.5,90.7,100.0,2.7,1005.1,0.0


In [113]:
time_df[time_df["지점명"] == "상하이"]

Unnamed: 0_level_0,지점번호,지점명,기온,이슬점,습도,풍향,풍속,현지기압,강수량
일시,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-06-09 00:00:00,58362,상하이,22.8,18.9,78.0,160.0,2.0,1007.8,0.0
2024-06-09 03:00:00,58362,상하이,22.3,19.1,83.0,180.0,2.0,1007.8,0.0
2024-06-09 06:00:00,58362,상하이,22.2,20.0,88.0,190.0,1.8,1008.0,0.0
2024-06-09 09:00:00,58362,상하이,23.1,20.5,84.7,190.0,2.5,1009.3,0.0
2024-06-09 12:00:00,58362,상하이,28.0,18.4,56.5,210.0,3.7,1009.4,0.0
2024-06-09 15:00:00,58362,상하이,31.1,16.8,42.0,220.0,2.3,1008.5,0.0
2024-06-09 18:00:00,58362,상하이,30.2,16.1,42.3,190.0,4.3,1008.1,0.0
2024-06-09 21:00:00,58362,상하이,26.3,14.6,49.3,190.0,4.0,1009.3,0.0


In [114]:
time_df.to_csv("./china_weather.csv")

In [89]:
check_df = pd.read_csv("./china_weather.csv")

In [92]:
df[df["지점명"] == "YUSHU"]

Unnamed: 0_level_0,지점번호,지점명,기온,이슬점,습도,풍향,풍속,현지기압,강수량
일시,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-06-09 00:00:00,56029,YUSHU,7.2,5.0,87.0,290.0,1.0,650.2,0
2024-06-09 03:00:00,56029,YUSHU,7.8,6.7,93.0,110.0,1.0,649.8,0
2024-06-09 06:00:00,56029,YUSHU,8.3,7.2,93.0,,0.0,650.2,0
2024-06-09 09:00:00,56029,YUSHU,10.0,6.1,78.0,200.0,1.0,650.9,0
2024-06-09 12:00:00,56029,YUSHU,13.3,6.1,63.0,200.0,1.0,648.8,0


In [103]:
check_df[check_df["지점명"] == "상하이"]

Unnamed: 0,일시,지점번호,지점명,기온,이슬점,습도,풍향,풍속,현지기압,강수량
80,2024-06-09 00:00:00,58362,상하이,26.1,26.1,100.0,140.0,2.0,1005.1,0.0
81,2024-06-09 03:00:00,58362,상하이,26.1,26.1,100.0,80.0,2.0,1004.4,0.0
82,2024-06-09 06:00:00,58362,상하이,25.0,25.0,100.0,70.0,2.0,1004.1,0.0
83,2024-06-09 09:00:00,58362,상하이,25.4,25.4,100.0,70.0,2.0,1005.4,0.0
84,2024-06-09 12:00:00,58362,상하이,26.4,26.3,99.1,100.0,2.0,1006.4,0.0
85,2024-06-09 15:00:00,58362,상하이,27.6,27.0,95.2,100.0,2.8,1005.8,0.0
86,2024-06-09 18:00:00,58362,상하이,27.8,26.5,90.7,100.0,2.7,1005.1,0.0
87,2024-06-09 21:00:00,58362,상하이,26.7,26.7,100.0,70.0,2.0,1006.4,0.0


In [96]:
temp_df = df[(df.index > "2024-06-09 15:00:00") & (df.index <= "2024-06-09 18:00:00") & (df["지점번호"] == "56029")]

In [99]:
temp_df

Unnamed: 0_level_0,지점번호,지점명,기온,이슬점,습도,풍향,풍속,현지기압,강수량
일시,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


In [102]:
round(temp_df.loc[:, "기온"].mean(), 1)

nan

In [93]:
for time, loc in zip(time_df.index, time_df["지점번호"]):
    start = time - timedelta(hours = 3)
    end = time
    
    temp_df = df[(df.index > start) & (df.index <= end) & (df["지점번호"] == loc)]
    
    print(temp_df)

Empty DataFrame
Columns: [지점번호, 지점명, 기온, 이슬점, 습도, 풍향, 풍속, 현지기압, 강수량]
Index: []
                      지점번호   지점명    기온  이슬점    습도     풍향   풍속   현지기압  강수량
일시                                                                       
2024-06-09 02:00:00  50745  치치하르  11.7  9.4  87.0  340.0  1.0  987.1    0
                      지점번호   지점명    기온  이슬점    습도     풍향   풍속   현지기압  강수량
일시                                                                       
2024-06-09 05:00:00  50745  치치하르  11.7  8.9  83.0  340.0  1.0  987.5    0
                      지점번호   지점명    기온  이슬점    습도     풍향   풍속   현지기압  강수량
일시                                                                       
2024-06-09 08:00:00  50745  치치하르  16.1  8.3  60.0  320.0  2.0  988.1    0
                      지점번호   지점명    기온  이슬점    습도     풍향   풍속   현지기압  강수량
일시                                                                       
2024-06-09 11:00:00  50745  치치하르  20.6  7.2  43.0  320.0  3.0  987.1    0
                      지점번호   지점명 

                      지점번호  지점명    기온   이슬점     습도     풍향   풍속    현지기압  강수량
일시                                                                         
2024-06-09 00:30:00  59287  광조우  26.1  26.1  100.0  110.0  2.0  1005.1    0
2024-06-09 01:00:00  59287  광조우  26.1  26.1  100.0   70.0  2.0  1005.1    0
2024-06-09 01:30:00  59287  광조우  26.1  26.1  100.0   90.0  2.0  1004.1    0
2024-06-09 02:00:00  59287  광조우  26.1  26.1  100.0   70.0  2.0  1004.1    0
2024-06-09 02:30:00  59287  광조우  26.1  26.1  100.0   70.0  2.0  1004.1    0
2024-06-09 03:00:00  59287  광조우  26.1  26.1  100.0   90.0  2.0  1004.1    0
                      지점번호  지점명    기온   이슬점     습도    풍향   풍속    현지기압  강수량
일시                                                                        
2024-06-09 03:30:00  59287  광조우  25.0  25.0  100.0  90.0  3.0  1004.1    0
2024-06-09 04:00:00  59287  광조우  25.0  25.0  100.0  70.0  2.0  1004.1    0
2024-06-09 04:30:00  59287  광조우  25.0  25.0  100.0  90.0  2.0  1004.1    0
2024-06-09 05:00: