# 관측 지점별 데이터 평균 계산

In [1]:
import pandas as pd

In [2]:
weather_data  = pd.read_csv("./data/weather_fire_label.csv", encoding='cp949')

In [3]:
weather_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16418972 entries, 0 to 16418971
Data columns (total 10 columns):
 #   Column   Dtype  
---  ------   -----  
 0   지점       int64  
 1   지점명      object 
 2   일시       object 
 3   기온(°C)   float64
 4   풍향(deg)  float64
 5   풍속(m/s)  float64
 6   강수량(mm)  float64
 7   습도(%)    float64
 8   실효습도     float64
 9   산불       int64  
dtypes: float64(6), int64(2), object(2)
memory usage: 1.2+ GB


In [4]:
weather_data['일시'] = pd.to_datetime(weather_data['일시'])

In [5]:
def get_year_season(date):
    year = date.year
    month = date.month

    if month in [3, 4, 5]:
        season = '봄'
        season_year = year
    elif month in [6, 7, 8]:
        season = '여름'
        season_year = year
    elif month in [9, 10, 11]:
        season = '가을'
        season_year = year
    else: # 12, 1, 2
        season = '겨울'
        if month == 12:
            season_year = year
        else: # month == 1 or 2
            season_year = year - 1

    return f"{season_year}년 {season}"

In [6]:
weather_data['년도_계절'] = weather_data['일시'].apply(get_year_season)

In [7]:
cols_to_agg = ['기온(°C)', '풍속(m/s)', '실효습도']
aggregations = ['mean', 'std', 'max', 'min']

In [8]:
seasonal_yearly_stats = weather_data.groupby(['지점', '지점명', '년도_계절'])[cols_to_agg].agg(aggregations)

In [9]:
seasonal_yearly_stats.columns = ['_'.join(col).strip() for col in seasonal_yearly_stats.columns.values]

In [10]:
seasonal_yearly_stats_df = seasonal_yearly_stats.reset_index()

In [11]:
seasonal_yearly_stats_df

Unnamed: 0,지점,지점명,년도_계절,기온(°C)_mean,기온(°C)_std,기온(°C)_max,기온(°C)_min,풍속(m/s)_mean,풍속(m/s)_std,풍속(m/s)_max,풍속(m/s)_min,실효습도_mean,실효습도_std,실효습도_max,실효습도_min
0,12,안면도(감),2021년 겨울,0.220056,3.161141,9.0,-6.7,2.779096,1.284620,7.8,0.0,30.969174,5.990328,48.24,10.77
1,12,안면도(감),2022년 가을,13.710425,4.644870,27.2,-3.0,2.510518,1.580296,9.9,0.0,35.789795,7.446029,50.49,15.30
2,12,안면도(감),2022년 겨울,0.997446,3.941468,13.8,-12.7,2.667400,1.446731,9.8,0.0,33.156813,7.001269,50.49,15.51
3,12,안면도(감),2022년 봄,11.239909,4.818339,21.9,-0.4,2.704444,1.575340,9.0,0.0,36.345782,7.697753,50.49,12.63
4,12,안면도(감),2022년 여름,23.586866,3.066409,32.0,15.4,3.030118,2.091135,11.0,0.0,42.881834,5.871551,50.49,14.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6934,996,화동,2023년 겨울,1.364429,5.596744,17.3,-14.3,3.272318,1.803542,10.5,0.0,38.482983,8.475648,49.98,13.74
6935,996,화동,2024년 가을,15.997287,8.941231,32.2,-3.1,2.538547,1.526818,7.5,0.0,41.225579,8.306353,50.49,12.24
6936,996,화동,2024년 겨울,-0.070430,3.626753,13.9,-9.0,3.244489,1.216883,6.8,0.0,33.311653,7.918415,49.47,12.06
6937,996,화동,2024년 봄,5.271505,4.995941,18.5,-9.1,3.367070,1.619432,10.5,0.0,30.925242,11.510561,49.98,7.14


In [14]:
# 계절 정렬
seasonal_yearly_stats_df['년도'] = seasonal_yearly_stats_df['년도_계절'].str.extract(r'(\d{4})').astype(int)
seasonal_yearly_stats_df['계절명'] = seasonal_yearly_stats_df['년도_계절'].str.extract(r'\d{4}년\s(.+)')

season_order = {'봄': 1, '여름': 2, '가을': 3, '겨울': 4}
seasonal_yearly_stats_df['계절순서'] = seasonal_yearly_stats_df['계절명'].map(season_order)

seasonal_yearly_stats_df_sorted = seasonal_yearly_stats_df.sort_values(by=['지점', '년도', '계절순서'])

seasonal_yearly_stats_df_sorted = seasonal_yearly_stats_df_sorted.drop(columns=['년도', '계절명', '계절순서'])

In [16]:
seasonal_yearly_stats_df_sorted.head(10)

Unnamed: 0,지점,지점명,년도_계절,기온(°C)_mean,기온(°C)_std,기온(°C)_max,기온(°C)_min,풍속(m/s)_mean,풍속(m/s)_std,풍속(m/s)_max,풍속(m/s)_min,실효습도_mean,실효습도_std,실효습도_max,실효습도_min
0,12,안면도(감),2021년 겨울,0.220056,3.161141,9.0,-6.7,2.779096,1.28462,7.8,0.0,30.969174,5.990328,48.24,10.77
3,12,안면도(감),2022년 봄,11.239909,4.818339,21.9,-0.4,2.704444,1.57534,9.0,0.0,36.345782,7.697753,50.49,12.63
4,12,안면도(감),2022년 여름,23.586866,3.066409,32.0,15.4,3.030118,2.091135,11.0,0.0,42.881834,5.871551,50.49,14.1
1,12,안면도(감),2022년 가을,13.710425,4.64487,27.2,-3.0,2.510518,1.580296,9.9,0.0,35.789795,7.446029,50.49,15.3
2,12,안면도(감),2022년 겨울,0.997446,3.941468,13.8,-12.7,2.6674,1.446731,9.8,0.0,33.156813,7.001269,50.49,15.51
7,12,안면도(감),2023년 봄,11.912164,5.196702,24.9,-0.2,2.807078,1.704865,10.7,0.0,35.259397,9.775676,50.49,7.65
8,12,안면도(감),2023년 여름,22.466086,3.279589,32.4,14.9,2.512671,1.656223,10.1,0.1,43.747551,5.10428,50.49,25.71
5,12,안면도(감),2023년 가을,15.984112,6.782929,28.6,-2.7,2.685943,1.509575,11.0,0.1,36.776854,7.76619,50.49,15.48
6,12,안면도(감),2023년 겨울,3.626669,4.334911,14.7,-9.5,2.926386,1.573438,9.4,0.1,37.840975,6.951576,50.49,20.1
11,12,안면도(감),2024년 봄,10.108382,5.142093,22.8,-4.5,2.547143,1.529943,9.8,0.0,36.957619,8.369293,50.49,9.15


In [17]:
seasonal_yearly_stats_df_sorted.to_csv('seasonal_yearly_weather_stats_sorted.csv', index=False)