<a href="https://colab.research.google.com/github/tak34/atmacup-15/blob/main/FE1_3_anime_mod.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 概要

anime.csvで特徴抽出を行う。
- membersなどの人数に関するものは、適当に比率をとる。
- type, source, ratingはlabel encoding。
- episodesは数字にする。
- airedは開始年と終了年を抽出する。
- genres, licensorsはone-hot encoding。
- producers, studiosは、ディスカッションに上がっていたSVDによる次元削減を流用。その際にpoducersは類似のものを事前に集約しておく。

In [None]:
!pip install Levenshtein

Collecting Levenshtein
  Downloading Levenshtein-0.21.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (172 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.5/172.5 kB[0m [31m518.2 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting rapidfuzz<4.0.0,>=2.3.0 (from Levenshtein)
  Downloading rapidfuzz-3.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, Levenshtein
Successfully installed Levenshtein-0.21.1 rapidfuzz-3.1.2


In [None]:
import matplotlib.pyplot as plt
# from matplotlib.gridspec import GridSpec
import numpy as np
import pandas as pd
import seaborn as sns
import datetime as dt
from pathlib import Path
import warnings
import os
import random
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots
from tqdm.notebook import tqdm
from collections import deque
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import DisjointSet
import Levenshtein
from itertools import chain, combinations

warnings.simplefilter('ignore')

In [None]:
# Config
SAVE = True
SAVE_DIR = Path("/content/drive/MyDrive/Kaggle/atmacup#15/proc/fe")
SAVE_FILE_NAME = "FE1_3_anime_20230722"

# データ読み込み

In [None]:
anime = pd.read_csv("/content/drive/MyDrive/Kaggle/atmacup#15/raw/anime.csv")

# 特徴抽出

In [None]:
# このリストに説明変数として使いそうな特徴量名を入れておいて、あとで出力する。
features = []

In [None]:
def label_encoding(df, col):
    tmp = df.copy()
    label_dict = {}
    for i, s in enumerate(tmp[col].unique()):
        label_dict[s] = i
    tmp[f"label_{col}"] = tmp[col].map(label_dict)
    return tmp

## 人数の列

In [None]:
# 人数に関する6列で特徴抽出
anime_use_cols = [
 'members',
 'watching',
 'completed',
 'on_hold',
 'dropped',
 'plan_to_watch',
]

In [None]:
# 適当に特徴抽出
for col in anime_use_cols:
    if col in ["members"]:
        continue
    anime[f"{col}_per_members"] = anime[col] / anime["members"]
    features.append(f"{col}_per_members")

for col in ["members", "completed"]:
    sc = StandardScaler()
    data_sc = np.log1p(anime[col].values).reshape(-1, 1)
    data_sc = sc.fit_transform(data_sc)
    anime[f"{col}_scaled"] = data_sc
    features.append(f"{col}_scaled")

anime.head(2)

Unnamed: 0,anime_id,genres,japanese_name,type,episodes,aired,producers,licensors,studios,source,...,on_hold,dropped,plan_to_watch,watching_per_members,completed_per_members,on_hold_per_members,dropped_per_members,plan_to_watch_per_members,members_scaled,completed_scaled
0,000ba7f7e34e107e7544,"Comedy, Sci-Fi, Seinen, Slice of Life, Space",宇宙兄弟,TV,99,"Apr 1, 2012 to Mar 22, 2014","Aniplex, Dentsu, YTV, Trinity Sound",Sentai Filmworks,A-1 Pictures,Manga,...,13009,6948,76685,0.110033,0.24752,0.08648,0.046188,0.509779,-0.190714,-0.51002
1,00427279d72064e7fb69,"Adventure, Slice of Life, Mystery, Historical,...",蟲師,TV,26,"Oct 23, 2005 to Jun 19, 2006","Avex Entertainment, Marvelous, SKY Perfect Wel...",Funimation,Artland,Manga,...,42786,20017,267080,0.089381,0.379181,0.068928,0.032247,0.430263,1.601196,0.610288


In [None]:
features

['watching_per_members',
 'completed_per_members',
 'on_hold_per_members',
 'dropped_per_members',
 'plan_to_watch_per_members',
 'members_scaled',
 'completed_scaled']

## type

In [None]:
print("●type")
print("unique data:", len(anime["type"].unique()))
anime["type"].unique()

●type
unique data: 7


array(['TV', 'Special', 'Movie', 'Unknown', 'ONA', 'OVA', 'Music'],
      dtype=object)

In [None]:
# label encoding
anime = label_encoding(anime, "type")
anime.head(2)

Unnamed: 0,anime_id,genres,japanese_name,type,episodes,aired,producers,licensors,studios,source,...,dropped,plan_to_watch,watching_per_members,completed_per_members,on_hold_per_members,dropped_per_members,plan_to_watch_per_members,members_scaled,completed_scaled,label_type
0,000ba7f7e34e107e7544,"Comedy, Sci-Fi, Seinen, Slice of Life, Space",宇宙兄弟,TV,99,"Apr 1, 2012 to Mar 22, 2014","Aniplex, Dentsu, YTV, Trinity Sound",Sentai Filmworks,A-1 Pictures,Manga,...,6948,76685,0.110033,0.24752,0.08648,0.046188,0.509779,-0.190714,-0.51002,0
1,00427279d72064e7fb69,"Adventure, Slice of Life, Mystery, Historical,...",蟲師,TV,26,"Oct 23, 2005 to Jun 19, 2006","Avex Entertainment, Marvelous, SKY Perfect Wel...",Funimation,Artland,Manga,...,20017,267080,0.089381,0.379181,0.068928,0.032247,0.430263,1.601196,0.610288,0


In [None]:
print("unique data:", len(anime["label_type"].unique()))
anime["label_type"].unique()

unique data: 7


array([0, 1, 2, 3, 4, 5, 6])

In [None]:
features.append("label_type")

## episodes

In [None]:
anime["episodes"] = pd.to_numeric(anime["episodes"], errors="coerce").fillna(-100).astype(int)
# episodesが100以上のやつは件数少ないので100にしておく
anime.loc[anime["episodes"]>100, "episodes"] = 100
anime.head(2)

Unnamed: 0,anime_id,genres,japanese_name,type,episodes,aired,producers,licensors,studios,source,...,dropped,plan_to_watch,watching_per_members,completed_per_members,on_hold_per_members,dropped_per_members,plan_to_watch_per_members,members_scaled,completed_scaled,label_type
0,000ba7f7e34e107e7544,"Comedy, Sci-Fi, Seinen, Slice of Life, Space",宇宙兄弟,TV,99,"Apr 1, 2012 to Mar 22, 2014","Aniplex, Dentsu, YTV, Trinity Sound",Sentai Filmworks,A-1 Pictures,Manga,...,6948,76685,0.110033,0.24752,0.08648,0.046188,0.509779,-0.190714,-0.51002,0
1,00427279d72064e7fb69,"Adventure, Slice of Life, Mystery, Historical,...",蟲師,TV,26,"Oct 23, 2005 to Jun 19, 2006","Avex Entertainment, Marvelous, SKY Perfect Wel...",Funimation,Artland,Manga,...,20017,267080,0.089381,0.379181,0.068928,0.032247,0.430263,1.601196,0.610288,0


In [None]:
features.append("episodes")

## aired

In [None]:
anime["aired_from_to"] = anime["aired"].str.split(" to ")

anime["aired_from"] = anime["aired_from_to"].apply(lambda x: x[0])
anime["aired_to"] = anime["aired_from_to"].apply(lambda x: x[-1])

anime["aired_from"] = anime["aired_from"].apply(lambda x: x[-4:])
anime["aired_to"] = anime["aired_to"].apply(lambda x: x[-4:])

anime["aired_from"] = pd.to_numeric(anime["aired_from"], errors="coerce")
anime["aired_to"] = pd.to_numeric(anime["aired_to"], errors="coerce")

anime["aired_duration"] = anime["aired_to"] - anime["aired_from"]

# 欠損値は-100にしておく
anime["aired_from"] = anime["aired_from"].fillna(-100).astype(int)
anime["aired_to"] = anime["aired_to"].fillna(-100).astype(int)
anime["aired_duration"] = anime["aired_duration"].fillna(-100).astype(int)

anime.head(2)

Unnamed: 0,anime_id,genres,japanese_name,type,episodes,aired,producers,licensors,studios,source,...,on_hold_per_members,dropped_per_members,plan_to_watch_per_members,members_scaled,completed_scaled,label_type,aired_from_to,aired_from,aired_to,aired_duration
0,000ba7f7e34e107e7544,"Comedy, Sci-Fi, Seinen, Slice of Life, Space",宇宙兄弟,TV,99,"Apr 1, 2012 to Mar 22, 2014","Aniplex, Dentsu, YTV, Trinity Sound",Sentai Filmworks,A-1 Pictures,Manga,...,0.08648,0.046188,0.509779,-0.190714,-0.51002,0,"[Apr 1, 2012, Mar 22, 2014]",2012,2014,2
1,00427279d72064e7fb69,"Adventure, Slice of Life, Mystery, Historical,...",蟲師,TV,26,"Oct 23, 2005 to Jun 19, 2006","Avex Entertainment, Marvelous, SKY Perfect Wel...",Funimation,Artland,Manga,...,0.068928,0.032247,0.430263,1.601196,0.610288,0,"[Oct 23, 2005, Jun 19, 2006]",2005,2006,1


In [None]:
features += ["aired_from", "aired_to", "aired_duration"]

## source

In [None]:
print("●source")
print("unique data:", len(anime["source"].unique()))
anime["source"].unique()

●source
unique data: 13


array(['Manga', 'Light novel', 'Original', 'Visual novel', 'Game',
       'Novel', 'Web manga', '4-koma manga', 'Other', 'Card game',
       'Digital manga', 'Book', 'Music'], dtype=object)

In [None]:
# label encoding
anime = label_encoding(anime, "source")
anime.head(2)

Unnamed: 0,anime_id,genres,japanese_name,type,episodes,aired,producers,licensors,studios,source,...,dropped_per_members,plan_to_watch_per_members,members_scaled,completed_scaled,label_type,aired_from_to,aired_from,aired_to,aired_duration,label_source
0,000ba7f7e34e107e7544,"Comedy, Sci-Fi, Seinen, Slice of Life, Space",宇宙兄弟,TV,99,"Apr 1, 2012 to Mar 22, 2014","Aniplex, Dentsu, YTV, Trinity Sound",Sentai Filmworks,A-1 Pictures,Manga,...,0.046188,0.509779,-0.190714,-0.51002,0,"[Apr 1, 2012, Mar 22, 2014]",2012,2014,2,0
1,00427279d72064e7fb69,"Adventure, Slice of Life, Mystery, Historical,...",蟲師,TV,26,"Oct 23, 2005 to Jun 19, 2006","Avex Entertainment, Marvelous, SKY Perfect Wel...",Funimation,Artland,Manga,...,0.032247,0.430263,1.601196,0.610288,0,"[Oct 23, 2005, Jun 19, 2006]",2005,2006,1,0


In [None]:
print("unique data:", len(anime["label_source"].unique()))
anime["label_source"].unique()

unique data: 13


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [None]:
features.append("label_source")

## rating

In [None]:
print("●rating")
print("unique data:", len(anime["rating"].unique()))
anime["rating"].unique()

●rating
unique data: 7


array(['PG-13 - Teens 13 or older', 'R+ - Mild Nudity',
       'R - 17+ (violence & profanity)', 'G - All Ages', 'PG - Children',
       'Rx - Hentai', 'Unknown'], dtype=object)

In [None]:
# label encoding
anime = label_encoding(anime, "rating")
anime.head(2)

Unnamed: 0,anime_id,genres,japanese_name,type,episodes,aired,producers,licensors,studios,source,...,plan_to_watch_per_members,members_scaled,completed_scaled,label_type,aired_from_to,aired_from,aired_to,aired_duration,label_source,label_rating
0,000ba7f7e34e107e7544,"Comedy, Sci-Fi, Seinen, Slice of Life, Space",宇宙兄弟,TV,99,"Apr 1, 2012 to Mar 22, 2014","Aniplex, Dentsu, YTV, Trinity Sound",Sentai Filmworks,A-1 Pictures,Manga,...,0.509779,-0.190714,-0.51002,0,"[Apr 1, 2012, Mar 22, 2014]",2012,2014,2,0,0
1,00427279d72064e7fb69,"Adventure, Slice of Life, Mystery, Historical,...",蟲師,TV,26,"Oct 23, 2005 to Jun 19, 2006","Avex Entertainment, Marvelous, SKY Perfect Wel...",Funimation,Artland,Manga,...,0.430263,1.601196,0.610288,0,"[Oct 23, 2005, Jun 19, 2006]",2005,2006,1,0,0


In [None]:
print("unique data:", len(anime["label_rating"].unique()))
anime["label_rating"].unique()

unique data: 7


array([0, 1, 2, 3, 4, 5, 6])

In [None]:
features.append("label_rating")

## genres, producers, licensors, studios

- producersだけ、前処理として類似のものをまとめる。手法は以下のアニメ原作でグループ化する方法を応用する。
https://www.guruguru.science/competitions/21/discussions/04b127cc-c527-4f9d-9057-109ea54a05eb/

### producersの修正

In [None]:
list_producers_2d = anime["producers"].map(lambda x: x.split(", ")).tolist()
list_producers = list(set(chain.from_iterable(list_producers_2d)))
df_producers = pd.DataFrame(list_producers, columns=["producer"])
df_producers

Unnamed: 0,producer
0,Hochi Shimbun
1,Sakura Create
2,Bulls Eye
3,Audio Highs
4,Yomiuri TV Enterprise
...,...
590,Pazzy Entertainment
591,I was a Ballerina
592,Yomiko Advertising
593,Barnum Studio


In [None]:
def get_producer_name(df, threshold=0.15):

    _feature = df["producer"].tolist()
    _n = df.shape[0]

    _disjoint_set = DisjointSet(list(range(_n)))
    for i, j in tqdm(combinations(range(_n), 2)):
        if _feature[i] is np.nan or _feature[j] is np.nan:
            lv_dist, jw_dist = 0.5, 0.5
        else:
            # Studioなんちゃらが結構多くてノイズになる。Studioの文字列は削除する
            if "Studio" in _feature[i]:
                name1 = _feature[i][6:16]
            else:
                name1 = _feature[i][:10]
            if "Studio" in _feature[j]:
                name2 = _feature[j][6:16]
            else:
                name2 = _feature[j][:10]
            lv_dist = 1 - Levenshtein.ratio(name1, name2)
            jw_dist = 1 - Levenshtein.jaro_winkler(name1, name2)
        _d = (lv_dist + jw_dist) / 2

        if _d < threshold:
            _disjoint_set.merge(i, j)

    _labels = [None] * _n
    for subset in _disjoint_set.subsets():
        label = _feature[list(subset)[0]]
        for element in subset:
            _labels[element] = label
    df["producer_mod"] = _labels

    return df


producer_mod = get_producer_name(df_producers)
print(f"raw - japanese_name nunique: {producer_mod['producer'].nunique()}")
print(f"processed - japanese_name nunique: {producer_mod['producer_mod'].nunique()}")
producer_mod.head()

0it [00:00, ?it/s]

raw - japanese_name nunique: 595
processed - japanese_name nunique: 542


Unnamed: 0,producer,producer_mod
0,Hochi Shimbun,Hochi Shimbun
1,Sakura Create,Sakura Create
2,Bulls Eye,Bulls Eye
3,Audio Highs,Audio Highs
4,Yomiuri TV Enterprise,Yomiuri Telecasting


In [None]:
aaa = producer_mod.set_index("producer")
dict_producer_mod = aaa.to_dict()["producer_mod"]

def modify_producer(x, dict_producer=dict_producer_mod):
    lines = x.split(", ")
    new_lines = []
    for l in lines:
        if l in dict_producer.keys():
            new_lines.append(dict_producer[l])
        else:
            new_lines.append(l)
    # for key, value in dict_producer.items():
    #     lines = lines.replace(key, value)
    return list(set(new_lines))

anime["producers_mod"] = anime["producers"].apply(modify_producer)
anime.head(2)

Unnamed: 0,anime_id,genres,japanese_name,type,episodes,aired,producers,licensors,studios,source,...,members_scaled,completed_scaled,label_type,aired_from_to,aired_from,aired_to,aired_duration,label_source,label_rating,producers_mod
0,000ba7f7e34e107e7544,"Comedy, Sci-Fi, Seinen, Slice of Life, Space",宇宙兄弟,TV,99,"Apr 1, 2012 to Mar 22, 2014","Aniplex, Dentsu, YTV, Trinity Sound",Sentai Filmworks,A-1 Pictures,Manga,...,-0.190714,-0.51002,0,"[Apr 1, 2012, Mar 22, 2014]",2012,2014,2,0,0,"[Trinity Sound, Aniplex, Dentsu, YTV]"
1,00427279d72064e7fb69,"Adventure, Slice of Life, Mystery, Historical,...",蟲師,TV,26,"Oct 23, 2005 to Jun 19, 2006","Avex Entertainment, Marvelous, SKY Perfect Wel...",Funimation,Artland,Manga,...,1.601196,0.610288,0,"[Oct 23, 2005, Jun 19, 2006]",2005,2006,1,0,0,"[Delfi Sound, Avex Entertainment, Marvelous AQ..."


### 特徴抽出

- これをそのままパクる  
https://www.guruguru.science/competitions/21/discussions/7885664e-2acd-4191-833f-e1b21d34afc4/  

In [None]:
multilabel_cols = ["genres", "producers_mod", "licensors", "studios"]
multilabel_dfs = []
n_components = 10
for c in multilabel_cols:
    if c=="producers_mod":
        list_srs = anime[c].tolist()
    else:
        list_srs = anime[c].map(lambda x: x.split(", ")).tolist()
    # MultiLabelBinarizerを使うと簡単に変換できるのでオススメです
    mlb = MultiLabelBinarizer()
    ohe_srs = mlb.fit_transform(list_srs)
    if c == "genres" or c == "licensors":
        # ユニーク数が多くないのでOne-hot表現のまま
        col_df = pd.DataFrame(ohe_srs, columns=[f"ohe_{c}_{name}" for name in mlb.classes_])
    else:
        # ユニーク数が多いので、SVDで次元圧縮する
        svd = TruncatedSVD(n_components=n_components, random_state=0)
        svd_arr = svd.fit_transform(ohe_srs)
        col_df = pd.DataFrame(
            svd_arr,
            columns=[f"svd_{c}_{ix}" for ix in range(n_components)]
        )
    multilabel_dfs.append(col_df)

multilabel_df = pd.concat(multilabel_dfs, axis=1)
print(multilabel_df.shape)
multilabel_df

(2000, 111)


Unnamed: 0,ohe_genres_Action,ohe_genres_Adventure,ohe_genres_Cars,ohe_genres_Comedy,ohe_genres_Dementia,ohe_genres_Demons,ohe_genres_Drama,ohe_genres_Ecchi,ohe_genres_Fantasy,ohe_genres_Game,...,svd_studios_0,svd_studios_1,svd_studios_2,svd_studios_3,svd_studios_4,svd_studios_5,svd_studios_6,svd_studios_7,svd_studios_8,svd_studios_9
0,0,0,0,1,0,0,0,0,0,0,...,3.006844e-04,9.972524e-01,-7.521772e-03,2.341072e-04,-9.118615e-05,6.501412e-05,2.321149e-04,7.356040e-05,-3.424357e-04,-1.178398e-04
1,0,1,0,0,0,0,0,0,1,0,...,-1.900829e-06,2.161973e-06,-1.064836e-06,-7.268059e-05,-2.491330e-05,-5.347830e-06,3.490673e-05,-4.766511e-05,-6.814018e-05,9.884891e-05
2,0,1,0,0,0,0,0,0,1,0,...,-1.900829e-06,2.161973e-06,-1.064836e-06,-7.268059e-05,-2.491330e-05,-5.347830e-06,3.490673e-05,-4.766511e-05,-6.814018e-05,9.884891e-05
3,0,0,0,1,0,0,0,1,1,0,...,1.113427e-09,1.976065e-09,2.009354e-09,-1.630943e-08,-2.148815e-08,4.001086e-08,1.533465e-08,4.618116e-09,-8.325818e-08,5.740754e-08
4,0,0,0,1,0,0,0,0,0,0,...,-6.485317e-06,1.526225e-05,7.588316e-05,1.685114e-03,-2.961845e-04,3.883152e-04,1.291952e-04,-5.326035e-05,-6.660888e-04,5.398084e-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0,1,0,0,0,0,1,0,0,0,...,-6.485317e-06,1.526225e-05,7.588316e-05,1.685114e-03,-2.961845e-04,3.883152e-04,1.291952e-04,-5.326035e-05,-6.660888e-04,5.398084e-04
1996,0,0,0,0,0,0,0,1,0,0,...,1.259131e-03,6.692107e-03,9.960652e-01,-7.874581e-02,-2.696650e-05,-9.713901e-04,5.441507e-05,8.761767e-07,6.634946e-05,-1.304064e-04
1997,1,0,0,0,0,0,0,0,1,0,...,2.340272e-04,8.823870e-06,8.817722e-05,-2.808847e-04,-8.325917e-04,-5.316008e-04,-3.630619e-03,2.509415e-04,-3.205714e-03,3.595235e-03
1998,1,1,0,0,0,0,0,0,1,0,...,-2.155101e-05,2.229841e-06,-9.901805e-06,1.825667e-04,-2.691561e-04,1.847874e-04,1.391620e-04,1.788966e-04,-6.108575e-04,8.860767e-04


In [None]:
features += multilabel_df.columns.to_list()

In [None]:
anime = pd.concat((anime, multilabel_df), axis=1)
print(anime.shape)
anime.head()

(2000, 144)


Unnamed: 0,anime_id,genres,japanese_name,type,episodes,aired,producers,licensors,studios,source,...,svd_studios_0,svd_studios_1,svd_studios_2,svd_studios_3,svd_studios_4,svd_studios_5,svd_studios_6,svd_studios_7,svd_studios_8,svd_studios_9
0,000ba7f7e34e107e7544,"Comedy, Sci-Fi, Seinen, Slice of Life, Space",宇宙兄弟,TV,99,"Apr 1, 2012 to Mar 22, 2014","Aniplex, Dentsu, YTV, Trinity Sound",Sentai Filmworks,A-1 Pictures,Manga,...,0.0003006844,0.9972524,-0.007521772,0.0002341072,-9.118615e-05,6.501412e-05,0.0002321149,7.35604e-05,-0.0003424357,-0.0001178398
1,00427279d72064e7fb69,"Adventure, Slice of Life, Mystery, Historical,...",蟲師,TV,26,"Oct 23, 2005 to Jun 19, 2006","Avex Entertainment, Marvelous, SKY Perfect Wel...",Funimation,Artland,Manga,...,-1.900829e-06,2.161973e-06,-1.064836e-06,-7.268059e-05,-2.49133e-05,-5.34783e-06,3.490673e-05,-4.766511e-05,-6.814018e-05,9.884891e-05
2,00444b67aaabdf740a68,"Adventure, Slice of Life, Mystery, Historical,...",蟲師 続章,TV,10,"Apr 5, 2014 to Jun 21, 2014","Aniplex, Kodansha, Delfi Sound",Aniplex of America,Artland,Manga,...,-1.900829e-06,2.161973e-06,-1.064836e-06,-7.268059e-05,-2.49133e-05,-5.34783e-06,3.490673e-05,-4.766511e-05,-6.814018e-05,9.884891e-05
3,00839a3507ab168abe75,"Comedy, Ecchi, Fantasy, School",星刻の竜騎士,TV,12,"Apr 5, 2014 to Jun 21, 2014","Media Factory, AT-X, Sony Music Communications...",Funimation,C-Station,Light novel,...,1.113427e-09,1.976065e-09,2.009354e-09,-1.630943e-08,-2.148815e-08,4.001086e-08,1.533465e-08,4.618116e-09,-8.325818e-08,5.740754e-08
4,0192331235e110fe4f76,"Comedy, Harem, Romance, Sci-Fi, Shounen, Space",天地無用！,TV,26,"Apr 2, 1995 to Sep 24, 1995","TV Tokyo, Pioneer LDC","Funimation, Geneon Entertainment USA",AIC,Original,...,-6.485317e-06,1.526225e-05,7.588316e-05,0.001685114,-0.0002961845,0.0003883152,0.0001291952,-5.326035e-05,-0.0006660888,0.0005398084


# 保存

In [None]:
if SAVE:
    anime.to_pickle(SAVE_DIR / f"{SAVE_FILE_NAME}.pkl")

In [None]:
features

['watching_per_members',
 'completed_per_members',
 'on_hold_per_members',
 'dropped_per_members',
 'plan_to_watch_per_members',
 'members_scaled',
 'completed_scaled',
 'label_type',
 'episodes',
 'aired_from',
 'aired_to',
 'aired_duration',
 'label_source',
 'label_rating',
 'ohe_genres_Action',
 'ohe_genres_Adventure',
 'ohe_genres_Cars',
 'ohe_genres_Comedy',
 'ohe_genres_Dementia',
 'ohe_genres_Demons',
 'ohe_genres_Drama',
 'ohe_genres_Ecchi',
 'ohe_genres_Fantasy',
 'ohe_genres_Game',
 'ohe_genres_Harem',
 'ohe_genres_Hentai',
 'ohe_genres_Historical',
 'ohe_genres_Horror',
 'ohe_genres_Josei',
 'ohe_genres_Kids',
 'ohe_genres_Magic',
 'ohe_genres_Martial Arts',
 'ohe_genres_Mecha',
 'ohe_genres_Military',
 'ohe_genres_Music',
 'ohe_genres_Mystery',
 'ohe_genres_Parody',
 'ohe_genres_Police',
 'ohe_genres_Psychological',
 'ohe_genres_Romance',
 'ohe_genres_Samurai',
 'ohe_genres_School',
 'ohe_genres_Sci-Fi',
 'ohe_genres_Seinen',
 'ohe_genres_Shoujo',
 'ohe_genres_Shoujo Ai',
