In [53]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv('weather.csv', encoding='CP949')

In [54]:
data.columns

Index(['지점', '지점명', '일시', '평균기온(°C)', '최저기온(°C)', '최저기온 시각(hhmi)', '최고기온(°C)',
       '최고기온 시각(hhmi)', '일강수량(mm)', '최대 풍속(m/s)', '최대 풍속 풍향(16방위)',
       '평균 풍속(m/s)', '풍정합(100m)', '최다풍향(16방위)', '평균 이슬점온도(°C)', '평균 상대습도(%)',
       '평균 증기압(hPa)', '평균 현지기압(hPa)', '최고 해면기압(hPa)', '최저 해면기압(hPa)',
       '평균 해면기압(hPa)', '합계 일조시간(hr)', '합계 일사량(MJ/m2)', '평균 전운량(1/10)',
       '평균 중하층운량(1/10)', '평균 지면온도(°C)', '최저 초상온도(°C)', '평균 30cm 지중온도(°C)',
       '1.5m 지중온도(°C)', '합계 대형증발량(mm)'],
      dtype='object')

In [55]:
data.head()

Unnamed: 0,지점,지점명,일시,평균기온(°C),최저기온(°C),최저기온 시각(hhmi),최고기온(°C),최고기온 시각(hhmi),일강수량(mm),최대 풍속(m/s),...,평균 해면기압(hPa),합계 일조시간(hr),합계 일사량(MJ/m2),평균 전운량(1/10),평균 중하층운량(1/10),평균 지면온도(°C),최저 초상온도(°C),평균 30cm 지중온도(°C),1.5m 지중온도(°C),합계 대형증발량(mm)
0,90,속초,2011-01-01,-0.3,-5.0,100.0,1.7,1534.0,1.8,6.3,...,1021.3,0.0,,9.0,7.3,-1.9,-5.7,,,
1,90,속초,2011-01-02,0.9,-2.3,731.0,6.1,1353.0,1.4,4.1,...,1026.4,7.6,,4.4,4.1,-0.8,-5.0,,,
2,90,속초,2011-01-03,-0.6,-4.3,2351.0,3.8,1309.0,8.8,3.3,...,1023.0,3.3,,7.3,6.8,-0.1,-4.6,,,
3,90,속초,2011-01-04,0.2,-4.3,611.0,6.3,1340.0,,3.9,...,1018.9,8.3,,1.9,0.0,-0.8,-7.0,,,
4,90,속초,2011-01-05,-0.6,-3.5,2339.0,3.8,1255.0,,7.7,...,1017.0,7.3,,1.6,0.5,-1.3,-7.3,,,


In [56]:
data_columns = {'지점': 'location', 
                '지점명': 'location_name', 
                '일시': 'date', 
                '평균기온(°C)': 'avg_temp', 
                '최저기온(°C)': 'low_temp', 
                '최저기온 시각(hhmi)': 'low_temp_time', 
                '최고기온(°C)': 'high_temp',
                '최고기온 시각(hhmi)': 'high_temp_time', 
                '일강수량(mm)': 'rainfall', 
                '최대 풍속(m/s)': 'max_wind_speed', 
                '최대 풍속 풍향(16방위)': 'max_wind_direction',
                '평균 풍속(m/s)': 'avg_wind_speed',
                '풍정합(100m)': 'wind_sum',
                '최다풍향(16방위)': 'max_wind_direction', 
                '평균 이슬점온도(°C)': 'avg_dew_point_temp', 
                '평균 상대습도(%)': 'avg_humidity',
                '평균 증기압(hPa)': 'avg_vapor_pressure', 
                '평균 현지기압(hPa)': 'avg_pressure', 
                '최고 해면기압(hPa)': 'max_sea_pressure', 
                '최저 해면기압(hPa)': 'min_sea_pressure',
                '평균 해면기압(hPa)': 'avg_sea_pressure', 
                '합계 일조시간(hr)': 'total_sunlight', 
                '합계 일사량(MJ/m2)': 'total_solar_radiation',
                '평균 전운량(1/10)': 'avg_cloud',
                '평균 중하층운량(1/10)': 'avg_mid_low_cloud', 
                '평균 지면온도(°C)': 'avg_ground_temp', 
                '최저 초상온도(°C)': 'min_high_temp', # minimum grass temperature 땅 위에 접해 있는 풀 위의 공기온도를 말한다. 야간복사냉각에 의한 서리의 발생을 예상하는 가늠이 된다.
                '평균 30cm 지중온도(°C)': 'avg_30cm_ground_temp',
                '1.5m 지중온도(°C)': '1.5m_ground_temp', 
                '합계 대형증발량(mm)': 'total_evaporation',
            }

In [57]:
data.rename(columns=data_columns, inplace=True)
data.sample(10)

Unnamed: 0,location,location_name,date,avg_temp,low_temp,low_temp_time,high_temp,high_temp_time,rainfall,max_wind_speed,...,avg_sea_pressure,total_sunlight,total_solar_radiation,avg_cloud,avg_mid_low_cloud,avg_ground_temp,min_high_temp,avg_30cm_ground_temp,1.5m_ground_temp,total_evaporation
10350,98,동두천,2015-01-31,-5.8,-11.6,730.0,1.7,1552.0,,3.0,...,1032.2,9.6,,,,-2.2,-13.8,,,
289776,271,봉화,2015-04-10,6.4,-1.7,431.0,16.0,1253.0,,4.1,...,1022.8,7.8,,,,9.1,-3.9,,,
143430,174,순천,2019-08-20,25.1,19.7,516.0,30.1,1301.0,,3.8,...,1009.0,7.5,,4.8,1.6,29.3,19.8,,,
143452,174,순천,2019-09-11,25.0,22.0,359.0,30.4,1451.0,0.7,3.7,...,1013.8,3.0,,8.0,5.0,28.4,21.3,,,
252753,256,주암,2011-11-22,4.2,-3.2,659.0,10.7,1349.0,0.2,1.9,...,1027.3,2.6,,,,6.4,-6.8,,,
213988,238,금산,2017-05-04,19.6,10.4,543.0,27.2,1552.0,,3.3,...,1016.0,7.2,,,,21.7,8.8,,,
25074,102,백령도,2015-05-23,17.4,14.5,29.0,23.5,1325.0,,6.2,...,1011.8,13.2,,2.4,1.5,26.1,13.6,,,
78804,135,추풍령,2012-06-22,23.5,16.6,545.0,30.1,1627.0,,5.6,...,1009.1,9.5,22.64,,,28.7,16.4,,,
312080,279,구미,2016-05-02,19.5,11.1,541.0,26.6,1413.0,0.9,3.4,...,1013.2,,,,,22.4,6.1,,,
131242,169,흑산도,2016-01-09,4.0,2.5,1756.0,6.5,1338.0,0.0,6.2,...,1025.9,3.1,6.86,6.1,6.1,5.5,0.2,,,


In [58]:
data.groupby('location_name')['location'].count()

location_name
강릉     3653
강진군    3651
강화     3652
거제     3652
거창     3653
       ... 
합천     3653
해남     3653
홍성     1885
홍천     3653
흑산도    3653
Name: location, Length: 98, dtype: int64

In [59]:
data['location_name'].nunique()

98

In [60]:
data.groupby('location_name')['location'].count().sort_values(ascending=False)

location_name
강릉       3653
장수       3653
인천       3653
인제       3653
이천       3653
         ... 
홍성       1885
북춘천      1553
대구(기)     619
세종        581
주암        540
Name: location, Length: 98, dtype: int64

In [61]:
data[data['location_name']=='서울']['location'].count()

3653

In [62]:
len(data)

343008

In [63]:
seoul = data[data['location_name']=='서울']
seoul.head(15)

Unnamed: 0,location,location_name,date,avg_temp,low_temp,low_temp_time,high_temp,high_temp_time,rainfall,max_wind_speed,...,avg_sea_pressure,total_sunlight,total_solar_radiation,avg_cloud,avg_mid_low_cloud,avg_ground_temp,min_high_temp,avg_30cm_ground_temp,1.5m_ground_temp,total_evaporation
38083,108,서울,2011-01-01,-6.8,-10.4,154.0,-2.9,1457.0,,4.7,...,1023.0,8.8,9.45,3.1,1.3,-3.5,-18.1,-0.1,10.0,
38084,108,서울,2011-01-02,-5.4,-8.5,621.0,-1.2,1455.0,,4.2,...,1027.6,8.9,9.73,0.0,0.0,-3.4,-15.3,-0.1,9.7,
38085,108,서울,2011-01-03,-4.5,-8.5,631.0,-0.3,1525.0,,4.8,...,1023.3,7.1,8.24,3.6,0.0,-3.1,-15.1,-0.1,9.6,
38086,108,서울,2011-01-04,-3.9,-7.4,648.0,-1.7,1355.0,,4.8,...,1022.1,0.6,5.93,5.8,5.4,-2.7,-16.1,-0.1,9.5,
38087,108,서울,2011-01-05,-4.0,-7.7,2353.0,-1.8,1208.0,,8.0,...,1021.5,5.0,8.24,5.5,5.5,-1.7,-10.1,-0.1,9.4,
38088,108,서울,2011-01-06,-7.6,-9.8,753.0,-4.6,1607.0,,5.1,...,1026.6,9.0,10.7,0.0,0.0,-4.0,-13.6,-0.1,9.3,
38089,108,서울,2011-01-07,-7.0,-10.6,753.0,-2.9,1502.0,,4.5,...,1028.9,9.1,10.84,0.0,0.0,-5.0,-16.6,-0.3,9.2,
38090,108,서울,2011-01-08,-3.6,-8.2,243.0,0.1,1713.0,0.2,4.8,...,1024.9,1.6,5.35,7.5,6.0,-3.4,-15.0,-0.5,9.1,
38091,108,서울,2011-01-09,-7.0,-10.3,2247.0,-0.3,0.0,0.0,6.5,...,1026.2,8.9,10.98,1.9,0.9,-3.1,-14.7,-0.2,9.1,
38092,108,서울,2011-01-10,-8.2,-11.8,538.0,-4.6,1443.0,,5.4,...,1026.7,8.0,10.89,3.1,0.5,-5.8,-18.5,-0.6,8.9,


In [64]:
seoul = seoul.drop(['location', 'location_name'], axis=1)
seoul.reset_index(drop=True, inplace=True)
seoul.head(15)

Unnamed: 0,date,avg_temp,low_temp,low_temp_time,high_temp,high_temp_time,rainfall,max_wind_speed,max_wind_direction,avg_wind_speed,...,avg_sea_pressure,total_sunlight,total_solar_radiation,avg_cloud,avg_mid_low_cloud,avg_ground_temp,min_high_temp,avg_30cm_ground_temp,1.5m_ground_temp,total_evaporation
0,2011-01-01,-6.8,-10.4,154.0,-2.9,1457.0,,4.7,290.0,1.9,...,1023.0,8.8,9.45,3.1,1.3,-3.5,-18.1,-0.1,10.0,
1,2011-01-02,-5.4,-8.5,621.0,-1.2,1455.0,,4.2,270.0,1.9,...,1027.6,8.9,9.73,0.0,0.0,-3.4,-15.3,-0.1,9.7,
2,2011-01-03,-4.5,-8.5,631.0,-0.3,1525.0,,4.8,270.0,2.1,...,1023.3,7.1,8.24,3.6,0.0,-3.1,-15.1,-0.1,9.6,
3,2011-01-04,-3.9,-7.4,648.0,-1.7,1355.0,,4.8,270.0,2.2,...,1022.1,0.6,5.93,5.8,5.4,-2.7,-16.1,-0.1,9.5,
4,2011-01-05,-4.0,-7.7,2353.0,-1.8,1208.0,,8.0,250.0,3.8,...,1021.5,5.0,8.24,5.5,5.5,-1.7,-10.1,-0.1,9.4,
5,2011-01-06,-7.6,-9.8,753.0,-4.6,1607.0,,5.1,270.0,2.9,...,1026.6,9.0,10.7,0.0,0.0,-4.0,-13.6,-0.1,9.3,
6,2011-01-07,-7.0,-10.6,753.0,-2.9,1502.0,,4.5,290.0,2.1,...,1028.9,9.1,10.84,0.0,0.0,-5.0,-16.6,-0.3,9.2,
7,2011-01-08,-3.6,-8.2,243.0,0.1,1713.0,0.2,4.8,270.0,2.5,...,1024.9,1.6,5.35,7.5,6.0,-3.4,-15.0,-0.5,9.1,
8,2011-01-09,-7.0,-10.3,2247.0,-0.3,0.0,0.0,6.5,270.0,4.1,...,1026.2,8.9,10.98,1.9,0.9,-3.1,-14.7,-0.2,9.1,
9,2011-01-10,-8.2,-11.8,538.0,-4.6,1443.0,,5.4,270.0,2.4,...,1026.7,8.0,10.89,3.1,0.5,-5.8,-18.5,-0.6,8.9,


In [65]:
seoul.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3653 entries, 0 to 3652
Data columns (total 28 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   date                   3653 non-null   object 
 1   avg_temp               3653 non-null   float64
 2   low_temp               3653 non-null   float64
 3   low_temp_time          3653 non-null   float64
 4   high_temp              3652 non-null   float64
 5   high_temp_time         3652 non-null   float64
 6   rainfall               1412 non-null   float64
 7   max_wind_speed         3651 non-null   float64
 8   max_wind_direction     3651 non-null   float64
 9   avg_wind_speed         3650 non-null   float64
 10  wind_sum               3650 non-null   float64
 11  max_wind_direction     3648 non-null   float64
 12  avg_dew_point_temp     3653 non-null   float64
 13  avg_humidity           3653 non-null   float64
 14  avg_vapor_pressure     3653 non-null   float64
 15  avg_

In [66]:
time_data = pd.to_datetime(seoul['date'], format='%Y-%m-%d')

In [67]:
seoul['year'] = time_data.dt.year
seoul['month'] = time_data.dt.month
seoul['day'] = time_data.dt.day
seoul

Unnamed: 0,date,avg_temp,low_temp,low_temp_time,high_temp,high_temp_time,rainfall,max_wind_speed,max_wind_direction,avg_wind_speed,...,avg_cloud,avg_mid_low_cloud,avg_ground_temp,min_high_temp,avg_30cm_ground_temp,1.5m_ground_temp,total_evaporation,year,month,day
0,2011-01-01,-6.8,-10.4,154.0,-2.9,1457.0,,4.7,290.0,1.9,...,3.1,1.3,-3.5,-18.1,-0.1,10.0,,2011,1,1
1,2011-01-02,-5.4,-8.5,621.0,-1.2,1455.0,,4.2,270.0,1.9,...,0.0,0.0,-3.4,-15.3,-0.1,9.7,,2011,1,2
2,2011-01-03,-4.5,-8.5,631.0,-0.3,1525.0,,4.8,270.0,2.1,...,3.6,0.0,-3.1,-15.1,-0.1,9.6,,2011,1,3
3,2011-01-04,-3.9,-7.4,648.0,-1.7,1355.0,,4.8,270.0,2.2,...,5.8,5.4,-2.7,-16.1,-0.1,9.5,,2011,1,4
4,2011-01-05,-4.0,-7.7,2353.0,-1.8,1208.0,,8.0,250.0,3.8,...,5.5,5.5,-1.7,-10.1,-0.1,9.4,,2011,1,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3648,2020-12-27,5.8,1.4,1.0,10.0,1412.0,0.0,2.8,50.0,1.8,...,7.5,4.9,1.4,-0.9,1.7,10.3,1.2,2020,12,27
3649,2020-12-28,6.7,4.2,826.0,11.4,1344.0,1.3,3.1,290.0,1.4,...,5.3,2.4,2.7,-0.6,2.1,10.1,1.3,2020,12,28
3650,2020-12-29,0.1,-6.2,2356.0,4.3,6.0,0.2,6.1,320.0,2.9,...,8.5,5.5,-0.1,-6.6,2.7,10.0,0.8,2020,12,29
3651,2020-12-30,-10.9,-12.9,2222.0,-6.2,1.0,,6.2,270.0,4.1,...,0.8,0.8,-4.4,-14.0,2.4,9.9,0.9,2020,12,30


In [69]:
seoul = seoul[seoul.columns.tolist()[-3:]+seoul.columns.tolist()[1:-3]]
seoul

Unnamed: 0,year,month,day,avg_temp,low_temp,low_temp_time,high_temp,high_temp_time,rainfall,max_wind_speed,...,avg_sea_pressure,total_sunlight,total_solar_radiation,avg_cloud,avg_mid_low_cloud,avg_ground_temp,min_high_temp,avg_30cm_ground_temp,1.5m_ground_temp,total_evaporation
0,2011,1,1,-6.8,-10.4,154.0,-2.9,1457.0,,4.7,...,1023.0,8.8,9.45,3.1,1.3,-3.5,-18.1,-0.1,10.0,
1,2011,1,2,-5.4,-8.5,621.0,-1.2,1455.0,,4.2,...,1027.6,8.9,9.73,0.0,0.0,-3.4,-15.3,-0.1,9.7,
2,2011,1,3,-4.5,-8.5,631.0,-0.3,1525.0,,4.8,...,1023.3,7.1,8.24,3.6,0.0,-3.1,-15.1,-0.1,9.6,
3,2011,1,4,-3.9,-7.4,648.0,-1.7,1355.0,,4.8,...,1022.1,0.6,5.93,5.8,5.4,-2.7,-16.1,-0.1,9.5,
4,2011,1,5,-4.0,-7.7,2353.0,-1.8,1208.0,,8.0,...,1021.5,5.0,8.24,5.5,5.5,-1.7,-10.1,-0.1,9.4,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3648,2020,12,27,5.8,1.4,1.0,10.0,1412.0,0.0,2.8,...,1021.6,5.9,8.12,7.5,4.9,1.4,-0.9,1.7,10.3,1.2
3649,2020,12,28,6.7,4.2,826.0,11.4,1344.0,1.3,3.1,...,1023.1,8.0,9.12,5.3,2.4,2.7,-0.6,2.1,10.1,1.3
3650,2020,12,29,0.1,-6.2,2356.0,4.3,6.0,0.2,6.1,...,1021.9,0.0,1.98,8.5,5.5,-0.1,-6.6,2.7,10.0,0.8
3651,2020,12,30,-10.9,-12.9,2222.0,-6.2,1.0,,6.2,...,1023.8,8.3,11.36,0.8,0.8,-4.4,-14.0,2.4,9.9,0.9


In [73]:
seoul.isnull().sum()

year                        0
month                       0
day                         0
avg_temp                    0
low_temp                    0
low_temp_time               0
high_temp                   1
high_temp_time              1
rainfall                 2241
max_wind_speed              2
max_wind_direction          2
max_wind_direction          5
avg_wind_speed              3
wind_sum                    3
max_wind_direction          2
max_wind_direction          5
avg_dew_point_temp          0
avg_humidity                0
avg_vapor_pressure          0
avg_pressure                1
max_sea_pressure            1
min_sea_pressure            2
avg_sea_pressure            1
total_sunlight             11
total_solar_radiation      24
avg_cloud                   0
avg_mid_low_cloud          21
avg_ground_temp             1
min_high_temp               3
avg_30cm_ground_temp       25
1.5m_ground_temp            9
total_evaporation         852
dtype: int64

In [74]:
data['rainfall'].isnull().sum()/len(data)

0.6347607052896725