- [CODiS](https://e-service.cwb.gov.tw/HistoryDataQuery/index.jsp)  (更新時間為每日12:00)
- 內湖站氣象觀測日報表(https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0A9F0&stname=%25E5%2585%25A7%25E6%25B9%2596&datepicker=2020-05-05)

In [1]:
import requests
import datetime
import time
import random
from pyquery import PyQuery as pq
from selenium import webdriver
import urllib.parse
import numpy as np
import pandas as pd 

In [2]:
# import datetime
def get_dates(start_date = datetime.date(2019, 1, 1), 
              end_date = datetime.date(2019, 1, 31) , 
              day_delta = datetime.timedelta(days=1)):
    '''
    Get the date strings from start_date to end_date, with step day_delta.
    start_date: datetime.date(2019, 1, 1)
    end_date: datetime.date(2019, 1, 31)
    day_delta: datetime.timedelta(days=1)
    '''
    dates_str_list = [(start_date + i*day_delta).isoformat() for i in range((end_date - start_date).days + 1)]
    return dates_str_list
    
# dates_str_list = []
# for i in range((end_date - start_date).days + 1):
#     dates_str_list.append(str(start_date + i*day_delta))
#     print(dates_str_list)

# while start_date <= end_date:
#     print(start_date)
#     start_date += delta

# for i in range((end_date - start_date).days):
#     print(start_date + i*day_delta)

In [3]:
# from pyquery import PyQuery as pq
def get_cwb_weather_info(date:str) -> tuple:
    '''
    Return the presure(hPa), temperature(Celsius), relative humidity(%) of the date.
    date: str, eg. 2020-01-31 (YYYY-MM-DD)
    '''
    cwb_url = f"https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=C0A9F0&stname=%25E5%2585%25A7%25E6%25B9%2596&datepicker={date}"
    pres_hpa_css = "tr:nth-child(22) td:nth-child(2)"
    temp_cels_css = "tr:nth-child(22) td:nth-child(4)"
    rh_percent_css = "tr:nth-child(22) td:nth-child(6)"
    
    html_doc = pq(cwb_url)
    try:
        pres_hpa = float(html_doc(pres_hpa_css).text())
    except Exception as ex:
        print(ex, "at pres_hpa")
        pres_hpa = None
    try:
        temp_cels = float(html_doc(temp_cels_css).text())
    except Exception as ex:
        print(ex, "at temp_cels")
        temp_cels = None
    try:
        rh_percent = float(html_doc(rh_percent_css).text())
    except Exception as ex:
        print(ex, "at rh_percent")
        rh_percent = None
    

    return (pres_hpa, temp_cels, rh_percent)   

In [4]:
# import time
# import random
# getting weather info from the designated dates and save it to a dict
time_start = datetime.datetime.now()
print("time_start: ", time_start)

weather_info_dict = dict()
for d in get_dates(start_date = datetime.date(2020, 1, 1),
                   end_date = datetime.date(2020, 6, 30)):
    print("retrieving", d, "...")
    weather_info = get_cwb_weather_info(d)
    sleep_secs = random.randint(1, 4)
    time.sleep(sleep_secs)
    weather_info_dict[d] = weather_info

time_rtrv_end = datetime.datetime.now()
print("time_rtrv_end: ", time_rtrv_end)

# reading and exporting the retrieved weather info
df = pd.DataFrame.from_dict(weather_info_dict, orient='index', columns=['pres_hpa', 'temp_cels', 'rh_percent'])
print(df.head())

# formatting the file name with date & time su
fmt = "%Y%m%dT%H%M%S"
t = time.localtime()
df.to_csv(f'../output/weather_info_2020_rtrv{time.strftime(fmt, t)}.csv', index=True,) 

time_savefile_end = datetime.datetime.now()
print("time_savefile_end: ", time_savefile_end)

# # if you'd like to export zip file containing that csv
# compression_opts = dict(method='zip',
#                         archive_name='out.csv')  

# df.to_csv('../output/weather_info.zip', index=False,
#           compression=compression_opts) 
print("===============")
print("data retrieval:", (time_rtrv_end - time_start))
print("file saving:", (time_savefile_end - time_rtrv_end))
print("total time:", (time_savefile_end - time_start))

time_start:  2020-07-21 18:47:14.949744
retrieving 2020-01-01 ...
could not convert string to float: 'X' at rh_percent
retrieving 2020-01-02 ...
could not convert string to float: 'X' at rh_percent
retrieving 2020-01-03 ...
retrieving 2020-01-04 ...
retrieving 2020-01-05 ...
retrieving 2020-01-06 ...
retrieving 2020-01-07 ...
retrieving 2020-01-08 ...
retrieving 2020-01-09 ...
retrieving 2020-01-10 ...
retrieving 2020-01-11 ...
retrieving 2020-01-12 ...
retrieving 2020-01-13 ...
retrieving 2020-01-14 ...
retrieving 2020-01-15 ...
retrieving 2020-01-16 ...
retrieving 2020-01-17 ...
retrieving 2020-01-18 ...
retrieving 2020-01-19 ...
retrieving 2020-01-20 ...
retrieving 2020-01-21 ...
retrieving 2020-01-22 ...
retrieving 2020-01-23 ...
retrieving 2020-01-24 ...
retrieving 2020-01-25 ...
retrieving 2020-01-26 ...
retrieving 2020-01-27 ...
retrieving 2020-01-28 ...
retrieving 2020-01-29 ...
retrieving 2020-01-30 ...
retrieving 2020-01-31 ...
retrieving 2020-02-01 ...
retrieving 2020-02-02 

#### Reference:
- [pandas.DataFrame.from_dict](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.from_dict.html)
- [pandas.DataFrame.to_csv](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html)