### Setup libs

In [80]:
import json
from datetime import datetime
import pandas as pd
from os import listdir
from os.path import isfile, join

### Reading multiple json raw files (data source)

In [71]:
def concatened_json_files(path):
    directory_path = path
    concatenated_file = []
    file_counter = 0

    file_list = [f for f in listdir(directory_path) if isfile(join(directory_path, f))]

    for file in file_list:
        file_counter+=1
        with open(f'{directory_path}{file}') as f:
            for obj in json.load(f):
                concatenated_file.append(obj)
    
    print(f'Concatened {file_counter} json files into one.')
    return concatenated_file

df = pd.json_normalize(concatened_json_files('../datasets/anime/raw_data/'))

Concatened 517 json files into one.


### Number of Rows

In [12]:
print(len(df.index))

21755


### Dump dataframe as csv for sample analysis

In [8]:
df.to_csv('../datasets/anime/sandbox/anime_dataset.csv',sep='|',index=False)

### Bleach preparation

In [40]:
pd.set_option('display.max_columns', None)
bleach = df[df['id'] == 269]
# bleach.head(1)

prep_bleach = bleach[['id','title','details.Status','details.Aired']]
prep_bleach.head(1)

Unnamed: 0,id,title,details.Status,details.Aired
2430,269,Bleach,Finished Airing,"Oct 5, 2004 to Mar 27, 2012"


In [121]:
prep_bleach_aired = prep_bleach


def convert_date_format(date):
    try:
        date_transformed = datetime.strptime(date, "%b %d, %Y")
        return datetime.strftime(date_transformed, '%Y-%m-%d')
    except:
        return f'Failed: {date}'

    
def get_aired_from_date(df_row):
    if df_row['details.Status'] == 'Not yet aired':
        return None
    else:
        from_date = convert_date_format(df_row['details.Aired'][:12].rstrip())
        if 'to' in from_date or len(from_date) == 9:
            partial_date = from_date.replace('to','').rstrip().lstrip()
            print(f'{partial_date[:3]} 1, {partial_date[5:]}')
            return f'{partial_date[:3]} 1, {partial_date[5:]}'
        else:
            return from_date


def get_aired_to_date(df_row):
    if df_row['details.Status'] == 'Finished Airing':
        return convert_date_format(df_row['details.Aired'][-12:].lstrip())
    else:
        return None

    
def calculate_airing_duration(df_row):
    from_date = df_row['details.Aired'][:12].rstrip()
    to_date = df_row['details.Aired'][-12:].lstrip()

    from_datetime = datetime.strptime(from_date, "%b %d, %Y")
    to_datetime = datetime.strptime(to_date, "%b %d, %Y")

    return int((to_datetime - from_datetime).total_seconds() / 86400)


prep_bleach_aired['aired_from'] = prep_bleach_aired.apply(get_aired_from_date,axis=1)
prep_bleach_aired['aired_to'] = prep_bleach_aired.apply(get_aired_to_date,axis=1)
prep_bleach_aired['days_in_air'] = prep_bleach_aired.apply(calculate_airing_duration,axis=1)
prep_bleach_aired

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prep_bleach_aired['aired_from'] = prep_bleach_aired.apply(get_aired_from_date,axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prep_bleach_aired['aired_to'] = prep_bleach_aired.apply(get_aired_to_date,axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prep_bleach_aired['days_in_air'] = pr

Unnamed: 0,id,title,details.Status,details.Aired,aired_from,aired_to,days_in_air
2430,269,Bleach,Finished Airing,"Oct 5, 2004 to Mar 27, 2012",2004-10-05,2012-03-27,2730


In [65]:
from_date = 'Oct 5, 2004'
to_date = 'Mar 27, 2012'

from_datetime = datetime.strptime(from_date, "%b %d, %Y")
to_datetime = datetime.strptime(to_date, "%b %d, %Y")

print(int((to_datetime - from_datetime).total_seconds() / 86400))

2730


In [43]:
dr_stone = df[df['title'] == 'Dr. Stone: Stone Wars']
dr_stone

Unnamed: 0,id,title,photo,synopsis,background,data_extraction,links.episodes,links.stats,links.characters & staff,details.Synonyms,details.Japanese,details.Type,details.Episodes,details.Status,details.Aired,details.Premiered,details.Broadcast,details.Producers,details.Licensors,details.Studios,details.Source,details.Genres,details.Duration,details.Rating,details.Score,details.ScoredBy,details.Ranked,details.Popularity,details.Members,details.Favorites,details.English,links
15648,40852,Dr. Stone: Stone Wars,https://cdn.myanimelist.net/images/anime/1711/...,Senkuu has made it his goal to bring back two ...,No background information,2021-02-16 22:50:32,https://myanimelist.net/anime/40852/Dr_Stone__...,https://myanimelist.net/anime/40852/Dr_Stone__...,https://myanimelist.net/anime/40852/Dr_Stone__...,"Dr. Stone 2nd Season, Dr. Stone Second Season",ドクターストーン STONE WARS,TV,11,Currently Airing,"Jan 14, 2021 to ?",Winter 2021,Thursdays at 22:30 (JST),"[TOHO animation, Shueisha]",[Funimation],[TMS Entertainment],Manga,"[Sci-Fi, Adventure, Shounen]",24 min. per ep.,PG-13 - Teens 13 or older,8.3,61121,#226,#338,392467,4109,,


In [122]:
df['aired_from'] = df.apply(get_aired_from_date,axis=1)
# df['aired_to'] = df.apply(get_aired_to_date,axis=1)
# df['days_in_air'] = df.apply(calculate_airing_duration,axis=1)

df.sort_values('aired_from',ascending=False).head(15)

Fai 1, d: Feb, 1992
Fai 1, d: Apr, 1994
Fai 1, d: Dec, 2005
Fai 1, d: Oct, 2007
Fai 1, d: Jan, 2011
Fai 1, d: Oct, 2006
Fai 1, d: Sep, 2009
Fai 1, d: 1994  1996
Fai 1, d: Jul, 2008
Fai 1, d: 1989  1991
Fai 1, d: 1995  1997
Fai 1, d: May, 2011
Fai 1, d: Apr, 2011
Fai 1, d: 1959  ?
Fai 1, d: 2006  2007
Fai 1, d: 2011  ?
Fai 1, d: Oct, 1999
Fai 1, d: Nov, 2013
Fai 1, d: Mar, 2004
Fai 1, d: Apr, 2000
Fai 1, d: Sep, 2011
Fai 1, d: 1990  ?
Fai 1, d: 1999  May
Fai 1, d: 2002  2008
Fai 1, d: 1982  1997
Fai 1, d: Sep, 2011
Fai 1, d: 2012  ?
Fai 1, d: Sep, 2010
Fai 1, d: Jun, 2000
Fai 1, d: 2009  ?
Fai 1, d: 1923  ?
Fai 1, d: 1924  ?
Fai 1, d: 1926  ?
Fai 1, d: 1931  ?
Fai 1, d: 1930  ?
Fai 1, d: 1932  ?
Fai 1, d: 1932  ?
Fai 1, d: 1932  ?
Fai 1, d: 1935  ?
Fai 1, d: 1935  ?
Fai 1, d: 1939  ?
Fai 1, d: 1947  ?
Fai 1, d: 1952  ?
Fai 1, d: 1953  ?
Fai 1, d: 2002  Oct
Fai 1, d: Nov, 2007
Fai 1, d: 2013  ?
Fai 1, d: Nov, 2009
Fai 1, d: Feb, 2008
Fai 1, d: Dec, 2008
Fai 1, d: Sep, 2009
Fai 1, d: 1980

Unnamed: 0,id,title,photo,synopsis,background,data_extraction,links.episodes,links.stats,links.characters & staff,details.Synonyms,details.Japanese,details.Type,details.Episodes,details.Status,details.Aired,details.Premiered,details.Broadcast,details.Producers,details.Licensors,details.Studios,details.Source,details.Genres,details.Duration,details.Rating,details.Score,details.ScoredBy,details.Ranked,details.Popularity,details.Members,details.Favorites,details.English,links,aired_from
19897,40495,Tayo-ui Gonglyong Eodeubencheo,https://cdn.myanimelist.net/images/anime/1895/...,"As little buses said goodbye to Tino, they for...",No background information,2021-02-16 22:43:59,,https://myanimelist.net/anime/40495/Tayo-ui_Go...,https://myanimelist.net/anime/40495/Tayo-ui_Go...,Tayo Dino Kingdom,타요의 공룡 어드벤처,Special,1,Finished Airing,"Sep, 2019",,,[add some],[None found],[add some],Original,"[Cars, Historical, Kids]",17 min.,G - All Ages,,,#12405,#16953,86,0,Tayo's Dino Kingdom Adventure,,"Failed: Sep, 2019"
15970,32345,Celebration and Chorale,https://cdn.myanimelist.net/images/anime/9/776...,Yukie Nakauchi's musical work using principal ...,No background information,2021-02-16 16:15:09,,https://myanimelist.net/anime/32345/Celebratio...,https://myanimelist.net/anime/32345/Celebratio...,Shukuten no Chorale,celebration and chorale,Movie,1,Finished Airing,"Sep, 2012",,,[add some],[None found],[add some],Original,[Music],5 min.,G - All Ages,5.62,157.0,#9353,#13298,318,0,celebration and chorale,,"Failed: Sep, 2012"
3894,13971,663114,https://cdn.myanimelist.net/images/anime/2/689...,663114 is a short but hard-hitting monologue o...,The film won the Oofuji Noburou Award at the 6...,2021-02-16 16:31:18,,https://myanimelist.net/anime/13971/663114/stats,https://myanimelist.net/anime/13971/663114/cha...,,663114,Movie,1,Finished Airing,"Sep, 2011",,,[add some],[None found],[add some],Original,[Fantasy],7 min.,G - All Ages,5.3,1785.0,#10121,#7641,3218,2,,,"Failed: Sep, 2011"
17675,31547,Ryouma 30 Seconds,https://cdn.myanimelist.net/images/anime/11/76...,No synopsis information has been added to this...,No background information,2021-02-16 15:53:36,https://myanimelist.net/anime/31547/Ryouma_30_...,https://myanimelist.net/anime/31547/Ryouma_30_...,https://myanimelist.net/anime/31547/Ryouma_30_...,,龍馬 30 Seconds,TV,5,Finished Airing,"Sep, 2009",Summer 2009,Unknown,[add some],[None found],[add some],Unknown,[Historical],30 sec. per ep.,G - All Ages,,,#11760,#15368,172,0,,,"Failed: Sep, 2009"
13876,5199,Seto no Hanayome Fan Disc,https://cdn.myanimelist.net/images/anime/13/22...,Fan discs containing shorts that weren't inclu...,No background information,2021-02-16 16:54:53,https://myanimelist.net/anime/5199/Seto_no_Han...,https://myanimelist.net/anime/5199/Seto_no_Han...,https://myanimelist.net/anime/5199/Seto_no_Han...,,瀬戸の花嫁,Special,2,Finished Airing,"Sep, 2008",,,[AIC],[None found],[Gonzo],Manga,"[Comedy, Parody, School]",6 min. per ep.,PG-13 - Teens 13 or older,7.06,2078.0,#3415,#5602,7180,4,,,"Failed: Sep, 2008"
16766,25271,Shinano Mainichi Shinbun,https://cdn.myanimelist.net/images/anime/5/645...,A 15-second animated commercial for The Shinan...,No background information,2021-02-16 16:54:50,,https://myanimelist.net/anime/25271/Shinano_Ma...,https://myanimelist.net/anime/25271/Shinano_Ma...,Shinano Mainichi Shimbun,信濃毎日新聞,Special,1,Finished Airing,"Sep, 2007",,,[add some],[None found],[add some],Original,[Slice of Life],15 sec.,G - All Ages,5.47,1279.0,#9754,#8129,2633,1,,,"Failed: Sep, 2007"
7174,39384,Kogitsune no Koutsuu Anzen,https://cdn.myanimelist.net/images/anime/1242/...,The three kitsune brothers learn about traffic...,No background information,2021-02-16 19:36:41,,https://myanimelist.net/anime/39384/Kogitsune_...,https://myanimelist.net/anime/39384/Kogitsune_...,,こぎつねの交通安全,OVA,1,Finished Airing,"Sep, 1997",,,[add some],[None found],[add some],Picture book,"[Fantasy, Kids]",16 min.,G - All Ages,,,#15106,#17334,55,0,,,"Failed: Sep, 1997"
15023,28569,Kogitsune no Shouboutai,https://cdn.myanimelist.net/images/anime/9/689...,The three kitsune brothers have moved away fro...,No background information,2021-02-16 18:27:06,,https://myanimelist.net/anime/28569/Kogitsune_...,https://myanimelist.net/anime/28569/Kogitsune_...,,こぎつねの消防隊,OVA,1,Finished Airing,"Sep, 1997",,,[add some],[None found],[add some],Picture book,"[Drama, Fantasy, Kids]",17 min.,G - All Ages,,,#15108,#16529,112,0,,,"Failed: Sep, 1997"
3811,9606,Kaba no Potomasu,https://cdn.myanimelist.net/images/anime/11/26...,Potomas the Hippo has a secret-he can speak li...,No background information,2021-02-16 18:54:45,,https://myanimelist.net/anime/9606/Kaba_no_Pot...,https://myanimelist.net/anime/9606/Kaba_no_Pot...,Potomas the Hippo,かばのポトマス,Movie,1,Finished Airing,"Sep, 1988",,,[add some],[None found],[add some],Unknown,[Kids],25 min.,PG - Children,,,#14740,#15683,154,1,,,"Failed: Sep, 1988"
12319,28035,Kitsune no Home Run Ou,https://cdn.myanimelist.net/images/anime/8/679...,"Two animal baseball teams, The Foxes and The R...",No background information,2021-02-16 18:17:58,,https://myanimelist.net/anime/28035/Kitsune_no...,https://myanimelist.net/anime/28035/Kitsune_no...,Kitsune no Homerun Ou,きつねのホームラン王,Movie,1,Finished Airing,"Sep, 1949",,,[add some],[None found],[add some],Original,"[Kids, Sports]",8 min.,G - All Ages,5.22,152.0,#10293,#12905,364,1,Seventh Inning Stretch,,"Failed: Sep, 1949"
