## Yahoo天気から過去の天気データをダウンロードする

https://weather.yahoo.co.jp/weather/jp/past/

In [207]:
from bs4 import BeautifulSoup as BS
import requests   

class WeatherScraper:
    
    url = "https://weather.yahoo.co.jp/weather/jp/past/"
    date = None
    weather = {}
    
    def get_weather(self, target_pref, target_zone, date):
        """
        Return place's weather of date

        input 対象都道府県
        input 対象地域
        input 日付
        """
        self.date = date
        self.weather.update({"date" : self.date})
        # get list of prefecture
        preflist = self.get_prefs()
        for pref in preflist:
            for section in pref.find_all("dd"):
                if section.find("a").get_text().strip() == target_pref:
                    next_url = self.url + section.find("a").get("href")
                    self.get_zone(target_zone, next_url)
                    self.get_detail_weather()
                    break
                
        return self.weather
    
    def get_zone(self, target_zone, next_url):
        html = requests.get(next_url)
        html.encoding = html.apparent_encoding
        soup = BS(html.text, "lxml")
        zonelst = soup.find(id = "zonelst")
        # 地域ごとに処理を行う
        for zone in zonelst.find_all("li"):
            if target_zone in zone.get_text().strip():
                self.url = "https://weather.yahoo.co.jp" +zone.find("a").get("href")
                return
    
    def get_detail_weather(self):
        # zoneのurlと日付から天気データを取得
        year, month, date = self.date.split("-")
        self.url = self.url.replace(".html", "/detail.html?c={}&m={}&d={}".format(year, month, date))
        html = requests.get(self.url)
        html.encoding = html.apparent_encoding
        soup = BS(html.text, "lxml")
        
        # 天気情報の入ったtableを取得
        table = soup.find("table", class_="yjw_table", id = None)
        for trs in table.find_all("tr"):
            tds = trs.find_all("td")
            
            # リストが結合されている場合に対処
            if len(tds) == 2:
                attr = tds[0].get_text()
                data = tds[1].get_text()
            elif len(tds) == 3:
                attr = tds[1].get_text()
                data = tds[2].get_text()
            self.weather.update({attr : data})
        return
    
    def print_preflist(self):
        """
        データが取得可能な地域の一覧をprintする
        """
        preflist = self.get_prefs()
        print("Belows are places where you can get data.")
        for pref in preflist:
            print(pref.find("dt").text) # 都道府県をprint
            for section in pref.find_all("dd"):
                print("\t", section.find("a").get_text())# 地域をprint
        print("------EOL------")
        return
    
    def get_prefs(self):
        """
        県の一覧を取得
        """
       
        html = requests.get(self.url)
        html.encoding = html.apparent_encoding
        soup = BS(html.text, "lxml")
        preflst = soup.find_all(id = "preflst")
        dls = preflst[0].find_all("dl")
        return dls

In [208]:
ws = WeatherScraper()
ws.get_weather("東京", "東京", "2017-7-2")

{'date': '2017-7-2',
 '天気': '晴れ',
 '日の入り': '19:01',
 '日の出': '4:29',
 '最低気温': '20.9℃',
 '最高気温': '31.9℃',
 '海面気圧': '1007.9hPa',
 '湿度': '56％',
 '現地気圧': '1005.2hPa',
 '積雪深': '---',
 '降水量': '0.0mm',
 '露点温度': '21.1℃',
 '風向': '南南東',
 '風速': '3m/s'}