In [None]:
!pip install geocoder
!pip install pyephem

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.6/98.6 KB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyephem
  Downloading pyephem-9.99.tar.gz (1.4 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyephem
  Building wheel for pyephem (setup.py) ... [?25l[?25hdone
  Created wheel for pyephem: filename=pyephem-9.99-py3-none-any.whl size=1568 sha256=d44e5ab70547011339feb8af50241c1b8897f3411663d9cb5e23f6d9af9863da
  Stored in directory: /root/.cache/pip/wheels/fa/

In [None]:
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
import pandas as pd
import seaborn as sns
import datetime as dt
from pathlib import Path
import warnings
import os
import random

import geocoder
import ephem

plt.rcParams['font.family'] = "MS Gothic"
warnings.simplefilter('ignore')

In [None]:
def fix_all_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
fix_all_seeds(0)

In [None]:
SAVE_DF = False
SAVE_DF_PATH = "/content/drive/MyDrive/Kaggle/花粉飛散量（ProbSpace）/proc/20221210/20220108_sunset.pkl"

In [None]:
train = pd.read_csv("/content/drive/MyDrive/Kaggle/花粉飛散量（ProbSpace）/raw/train_v2.csv")
test = pd.read_csv("/content/drive/MyDrive/Kaggle/花粉飛散量（ProbSpace）/raw/test_v2.csv")

In [None]:
def fetch_datetime_features(df):
    df["year"] = df["datetime"].map(lambda x: int(str(x)[:4]))
    df["month"] = df["datetime"].map(lambda x: int(str(x)[4:6]))
    df["day"] = df["datetime"].map(lambda x: int(str(x)[6:8]))
    df["hour"] = df["datetime"].map(lambda x: int(str(x)[8:]))
    df["datetime2"] = pd.to_datetime(df[["year", "month", "day", "hour"]])
    df["date_int"] = df["datetime"].map(lambda x: int(str(x)[:8]))
    df["date"] = df["datetime2"].dt.strftime("%Y/%m/%d")

    df["dayOfYear"] = df["datetime2"].dt.dayofyear

    # 日付をindexにする
    df.set_index("datetime2", inplace=True)
    return df

train = fetch_datetime_features(train)
test = fetch_datetime_features(test)

print("train:", train.shape)
print("test:", test.shape)
train.tail()

train: (12240, 23)
test: (336, 23)


Unnamed: 0_level_0,datetime,precipitation_utsunomiya,precipitation_chiba,precipitation_tokyo,temperature_utsunomiya,temperature_chiba,temperature_tokyo,winddirection_utsunomiya,winddirection_chiba,winddirection_tokyo,...,pollen_utsunomiya,pollen_chiba,pollen_tokyo,year,month,day,hour,date_int,date,dayOfYear
datetime2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03-31 20:00:00,2020033120,0.0,0.0,0,10.0,11.5,9.4,16,2,16,...,8.0,8.0,4.0,2020,3,31,20,20200331,2020/03/31,91
2020-03-31 21:00:00,2020033121,0.0,0.0,0,10.1,11.3,8.9,15,15,14,...,8.0,4.0,4.0,2020,3,31,21,20200331,2020/03/31,91
2020-03-31 22:00:00,2020033122,0.0,0.0,0,9.8,11.3,8.8,3,15,15,...,0.0,4.0,0.0,2020,3,31,22,20200331,2020/03/31,91
2020-03-31 23:00:00,2020033123,0.5,0.0,0,9.7,10.9,8.9,16,16,1,...,0.0,0.0,0.0,2020,3,31,23,20200331,2020/03/31,91
2020-04-01 00:00:00,2020033124,0.0,0.0,0,9.7,10.7,8.9,16,1,16,...,0.0,8.0,0.0,2020,3,31,24,20200331,2020/04/01,92


In [None]:
train_test = pd.concat((train, test))
date_list = train_test["date"].unique()
date_list

array(['2017/02/01', '2017/02/02', '2017/02/03', '2017/02/04',
       '2017/02/05', '2017/02/06', '2017/02/07', '2017/02/08',
       '2017/02/09', '2017/02/10', '2017/02/11', '2017/02/12',
       '2017/02/13', '2017/02/14', '2017/02/15', '2017/02/16',
       '2017/02/17', '2017/02/18', '2017/02/19', '2017/02/20',
       '2017/02/21', '2017/02/22', '2017/02/23', '2017/02/24',
       '2017/02/25', '2017/02/26', '2017/02/27', '2017/02/28',
       '2017/03/01', '2017/03/02', '2017/03/03', '2017/03/04',
       '2017/03/05', '2017/03/06', '2017/03/07', '2017/03/08',
       '2017/03/09', '2017/03/10', '2017/03/11', '2017/03/12',
       '2017/03/13', '2017/03/14', '2017/03/15', '2017/03/16',
       '2017/03/17', '2017/03/18', '2017/03/19', '2017/03/20',
       '2017/03/21', '2017/03/22', '2017/03/23', '2017/03/24',
       '2017/03/25', '2017/03/26', '2017/03/27', '2017/03/28',
       '2017/03/29', '2017/03/30', '2017/03/31', '2017/04/01',
       '2017/04/02', '2017/04/03', '2017/04/04', '2017/

# 経度と緯度

In [None]:
loc_list = ['宇都宮市中央生涯学習センター', '千葉県環境研究センター', '東京都多摩小平保健所']

for i in loc_list:
  loc = geocoder.osm(i, timeout=5.0)
  print(loc.latlng)
  print(loc.address)

[36.5594462, 139.88265145]
宇都宮市 中央生涯学習センター, 中央通り, 中央一丁目, 宇都宮市, 栃木県, 320-0802, 日本
[35.633642, 140.077749]
千葉県環境研究センター, 海浜松風通り, 高浜五丁目, 美浜区, 千葉市, 千葉県, 261-0004, 日本
[35.7298652, 139.51664115548698]
東京都多摩小平保健所, 青梅街道, 芝久保町, 小平市, 東京都, 188-0014, 日本


# 日の出と日の入り

In [None]:
columns = [
    "utsunomiya_rising",
    "utsunomiya_setting",
    "tokyo_rising",
    "tokyo_setting",
    "chiba_rising",
    "chiba_setting"
]

df_result = pd.DataFrame(columns=columns)

## 宇都宮

In [None]:
location = ephem.Observer()
location.lat,location.lon = '36.5594462','139.88265145'
for date in date_list:
    location.date = date
    rising_time = ephem.localtime(location.previous_rising(ephem.Sun())) + dt.timedelta(hours=9)
    df_result.loc[date, "utsunomiya_rising"] = rising_time
    setting_time = ephem.localtime(location.next_setting(ephem.Sun())) + dt.timedelta(hours=9)
    df_result.loc[date, "utsunomiya_setting"] = setting_time

## 千葉

In [None]:
location = ephem.Observer()
location.lat,location.lon = "35.633642", "140.077749"
for date in date_list:
    location.date = date
    rising_time = ephem.localtime(location.previous_rising(ephem.Sun())) + dt.timedelta(hours=9)
    df_result.loc[date, "chiba_rising"] = rising_time
    setting_time = ephem.localtime(location.next_setting(ephem.Sun())) + dt.timedelta(hours=9)
    df_result.loc[date, "chiba_setting"] = setting_time

## 東京

In [None]:
location = ephem.Observer()
location.lat,location.lon = "35.730059", "139.51648"
for date in date_list:
    location.date = date
    rising_time = ephem.localtime(location.previous_rising(ephem.Sun())) + dt.timedelta(hours=9)
    df_result.loc[date, "tokyo_rising"] = rising_time
    setting_time = ephem.localtime(location.next_setting(ephem.Sun())) + dt.timedelta(hours=9)
    df_result.loc[date, "tokyo_setting"] = setting_time

In [None]:
df_result

Unnamed: 0,utsunomiya_rising,utsunomiya_setting,tokyo_rising,tokyo_setting,chiba_rising,chiba_setting
2017/02/01,2017-02-01 06:42:05.475367,2017-02-01 17:06:21.115344,2017-02-01 06:42:00.215238,2017-02-01 17:09:21.504374,2017-02-01 06:39:34.890847,2017-02-01 17:07:17.318950
2017/02/02,2017-02-02 06:41:16.850386,2017-02-02 17:07:25.473495,2017-02-02 06:41:13.364561,2017-02-02 17:10:24.074528,2017-02-02 06:38:48.246192,2017-02-02 17:08:19.684317
2017/02/03,2017-02-03 06:40:26.642622,2017-02-03 17:08:29.734702,2017-02-03 06:40:24.951378,2017-02-03 17:11:26.527629,2017-02-03 06:38:00.041340,2017-02-03 17:09:21.930353
2017/02/04,2017-02-04 06:39:34.880252,2017-02-04 17:09:33.871768,2017-02-04 06:39:35.002941,2017-02-04 17:12:28.837442,2017-02-04 06:37:10.303422,2017-02-04 17:10:24.030927
2017/02/05,2017-02-05 06:38:41.593090,2017-02-05 17:10:37.860118,2017-02-05 06:38:43.548150,2017-02-05 17:13:30.980278,2017-02-05 06:36:19.061233,2017-02-05 17:11:25.962458
...,...,...,...,...,...,...
2020/04/11,2020-04-11 05:12:06.071993,2020-04-11 18:11:30.654094,2020-04-11 05:14:21.816851,2020-04-11 18:12:09.459380,2020-04-11 05:12:12.742902,2020-04-11 18:09:49.038476
2020/04/12,2020-04-12 05:10:43.030675,2020-04-12 18:12:22.405748,2020-04-12 05:13:00.816138,2020-04-12 18:12:59.168018,2020-04-12 05:10:51.975535,2020-04-12 18:10:38.512459
2020/04/13,2020-04-13 05:09:20.564797,2020-04-13 18:13:14.226176,2020-04-13 05:11:40.385452,2020-04-13 18:13:48.951129,2020-04-13 05:09:31.777514,2020-04-13 18:11:28.061581
2020/04/14,2020-04-14 05:07:58.705032,2020-04-14 18:14:06.119349,2020-04-14 05:10:20.555154,2020-04-14 18:14:38.813012,2020-04-14 05:08:12.179146,2020-04-14 18:12:17.690171


In [None]:
if SAVE_DF:
    df_result.to_pickle(SAVE_DF_PATH)