In [7]:
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
import pandas as pd
import seaborn as sns
import datetime as dt
from pathlib import Path
import warnings
import os
import random
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tqdm.notebook import tqdm
from folium import Map, Marker, Icon
from folium.plugins import MarkerCluster

warnings.simplefilter('ignore')

In [2]:
def fix_all_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
fix_all_seeds(0)

In [5]:
GRAPH_DIR = Path("/content/drive/MyDrive/Kaggle/BlueCarbon/proc/EDA_20230415_loc")

# データ読み込み

In [3]:
train = pd.read_pickle("/content/drive/MyDrive/Kaggle/BlueCarbon/proc/train.pkl")
test = pd.read_pickle("/content/drive/MyDrive/Kaggle/BlueCarbon/proc/test.pkl")

In [4]:
train_loc = train[["year", "lat", "lon", "cover"]]
test_loc = test[["year", "lat", "lon"]]

# 可視化

## Train

In [6]:
list_colors = [
    "darkblue",
    "blue",
    "cabetblue",
    "green",
    "orange",
    "red",
    "darkred"
]

list_cover = [
    "0 (cover=0)",
    "1 (0<cover<0.2)",
    "2 (0.2<=cover<0.4)",
    "3 (0.4<=cover<0.6)",
    "4 (0.6<=cover<0.8)",
    "5 (0.8<=cover<1)",
    "6 (cover=1)"
]

train_loc["cover_mod"] = "0 (cover=0)"
train_loc.loc[train_loc["cover"]>0, "cover_mod"] = "1 (0<cover<0.2)"
train_loc.loc[train_loc["cover"]>=0.2, "cover_mod"] = "2 (0.2<=cover<0.4)"
train_loc.loc[train_loc["cover"]>=0.4, "cover_mod"] = "3 (0.4<=cover<0.6)"
train_loc.loc[train_loc["cover"]>=0.6, "cover_mod"] = "4 (0.6<=cover<0.8)"
train_loc.loc[train_loc["cover"]>=0.8, "cover_mod"] = "5 (0.8<=cover<1)"
train_loc.loc[train_loc["cover"]>=1, "cover_mod"] = "6 (cover=1)"
train_loc.head()

Unnamed: 0,year,lat,lon,cover,cover_mod
0,2011.0,24.98914,125.243164,0.05,1 (0<cover<0.2)
1,2009.0,26.996172,127.912025,0.725,4 (0.6<=cover<0.8)
2,2009.0,26.363556,127.735138,0.025,1 (0<cover<0.2)
3,2009.0,26.991255,127.9123,0.575,3 (0.4<=cover<0.6)
4,2009.0,26.988255,127.917252,0.225,2 (0.2<=cover<0.4)


In [8]:
list_year = train_loc["year"].unique()

for year in list_year:
    marker_cluster = MarkerCluster()
    map = Map(location=[25.5, 127], zoom_start=10)
    train_loc_yearly = train_loc[train_loc["year"]==year]
    for color, cover in zip(list_colors, list_cover):
        tmp = train_loc_yearly[train_loc_yearly["cover_mod"]==cover]
        np_lat_train = tmp["lat"].values
        np_lon_train = tmp["lon"].values
        for lat, lon in zip(np_lat_train, np_lon_train):
            Marker(
                location=[lat, lon],
                # popup=cover,
                icon=Icon(color=color)
            ).add_to(marker_cluster)
    marker_cluster.add_to(map)
    map.save(GRAPH_DIR / f"map_ocean_train_loc_{int(year)}.html")

## Test

In [9]:
list_year = test_loc["year"].unique()

for year in list_year:
    marker_cluster = MarkerCluster()
    map = Map(location=[25.5, 127], zoom_start=10)
    tmp = test_loc[test_loc["year"]==year]
    np_lat_test = tmp["lat"].values
    np_lon_test = tmp["lon"].values
    for lat, lon in zip(np_lat_test, np_lon_test):
        Marker(
            location=[lat, lon],
            # popup=cover,
            icon=Icon(color="gray")
        ).add_to(marker_cluster)
    marker_cluster.add_to(map)
    map.save(GRAPH_DIR / f"map_ocean_test_loc_{int(year)}.html")