## 연도별 기온 분포: 일부 지정한 월 (시각화2)
+ 작성: 임경호

### 1. DB에 저장된 기온 데이터 가져오기

In [32]:
import pandas as pd

# user-defined modules
import sys
module_path = "D:\PythonProject\data-gatherer\common"
sys.path.append(module_path)
import dbconnect
import myutil

# DB에 저장된 기온 시작일
beg_date = '20120101'
# 직전일
end_date = myutil.get_previous_day()   

engine = dbconnect.db_engine("DEMO_DW")
query = """SELECT date_id, ta_avg, ta_max, ta_min, ta_diff \
             FROM weather \
            WHERE date_id between %(date1)s and %(date2)s \
            ORDER BY date_id;"""

# sql execution with parameters
df_weather = pd.read_sql(sql=query, con=engine, params={'date1' : beg_date, 'date2' : end_date})

#이상치 제거
index0 = df_weather[df_weather['ta_avg'] <= -50].index 
# print(index0) 
df_weather = df_weather.drop(index0)

In [33]:
df_weather.shape

(4215, 5)

In [34]:
df_weather

Unnamed: 0,date_id,ta_avg,ta_max,ta_min,ta_diff
0,20120101,1.2,5.6,-3.1,8.7
1,20120102,-1.0,4.5,-4.4,8.9
2,20120103,-0.1,3.8,-2.6,6.4
3,20120104,-3.2,-0.4,-4.8,4.4
4,20120105,-2.1,3.1,-5.6,8.7
...,...,...,...,...,...
4212,20230714,25.6,27.8,24.4,3.4
4213,20230715,27.0,29.9,24.0,5.9
4214,20230716,24.4,27.8,22.8,5.0
4215,20230717,26.7,32.4,23.1,9.3


### 2. 기온 데이터 연월 평균 분포 구성

In [35]:
df_weather.insert(1, 'year', df_weather['date_id'].str[0:4])
df_weather.insert(2, 'month', df_weather['date_id'].str[4:6])
# df_fx_all_save = df_fx_all_save.drop('cur_unit', axis=1)

In [36]:
df_weather

Unnamed: 0,date_id,year,month,ta_avg,ta_max,ta_min,ta_diff
0,20120101,2012,01,1.2,5.6,-3.1,8.7
1,20120102,2012,01,-1.0,4.5,-4.4,8.9
2,20120103,2012,01,-0.1,3.8,-2.6,6.4
3,20120104,2012,01,-3.2,-0.4,-4.8,4.4
4,20120105,2012,01,-2.1,3.1,-5.6,8.7
...,...,...,...,...,...,...,...
4212,20230714,2023,07,25.6,27.8,24.4,3.4
4213,20230715,2023,07,27.0,29.9,24.0,5.9
4214,20230716,2023,07,24.4,27.8,22.8,5.0
4215,20230717,2023,07,26.7,32.4,23.1,9.3


In [37]:
# 연월(year-month) 기온 평균
df_weather_ym = df_weather.groupby(['year', 'month']).agg(mean_ta_avg = ('ta_avg', 'mean'),
                                                          mean_ta_max = ('ta_max', 'mean'),
                                                          mean_ta_min = ('ta_min', 'mean'),
                                                          mean_ta_diff = ('ta_diff', 'mean'))
df_weather_ym

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_ta_avg,mean_ta_max,mean_ta_min,mean_ta_diff
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012,01,0.841935,5.112903,-2.819355,7.932258
2012,02,1.075862,5.937931,-3.175862,9.113793
2012,03,7.674194,12.712903,3.493548,9.219355
2012,04,15.080000,21.316667,9.533333,11.783333
2012,05,20.116129,26.154839,15.070968,11.083871
...,...,...,...,...,...
2023,03,11.751613,18.783871,5.374194,13.409677
2023,04,14.843333,20.906667,8.773333,12.133333
2023,05,19.280645,25.061290,13.751613,11.309677
2023,06,24.033333,29.586667,19.283333,10.303333


In [38]:
df_weather_ym.reset_index(inplace=True)
df_weather_ym

Unnamed: 0,year,month,mean_ta_avg,mean_ta_max,mean_ta_min,mean_ta_diff
0,2012,01,0.841935,5.112903,-2.819355,7.932258
1,2012,02,1.075862,5.937931,-3.175862,9.113793
2,2012,03,7.674194,12.712903,3.493548,9.219355
3,2012,04,15.080000,21.316667,9.533333,11.783333
4,2012,05,20.116129,26.154839,15.070968,11.083871
...,...,...,...,...,...,...
134,2023,03,11.751613,18.783871,5.374194,13.409677
135,2023,04,14.843333,20.906667,8.773333,12.133333
136,2023,05,19.280645,25.061290,13.751613,11.309677
137,2023,06,24.033333,29.586667,19.283333,10.303333


### 3. 기온 데이터 분포 Graph

In [41]:
""" 
plotly 패키지
# pip install plotly
# 만일 plotly 패키지의 코드가 잘 동작하지 않을 경우에는 command line에서 다음 코드 실행
# > pip install --upgrade nbformat
"""
import plotly.express as px
import webbrowser

month = ['05', '06', '07']
beg_year = beg_date[0:4]
end_year = end_date[0:4]
df_weather_m = df_weather_ym.query('month in @month')
graph_title = f'Yearly Weather <Month: {month}> (from {beg_year} to {end_year})'
# points: all or outliers
fig = px.line(df_weather_m, title=graph_title, x='year', y='mean_ta_avg', hover_data=df_weather_m.columns, color='month')
fig.show()
file_path = f"D:\PythonProject\data-gatherer\kma\output\연도별 기온분포(line).html"
fig.write_html(file_path)
webbrowser.open_new_tab(file_path)

True