In [1]:
import numpy as np 
import pandas as pd 

import seaborn as sns
import matplotlib.pyplot as plt

import geopandas as gpd
import pycountry
import pygal.maps
import pygal_maps_world.maps
from pygal.style import Style

In [2]:
df_raw = pd.read_csv('user_mal.csv')
df_raw = df_raw.dropna()
df_raw.head(5)
# 46358130 rows × 3 columns

Unnamed: 0,username,anime_id,my_score
0,karthiga,21,9.0
1,karthiga,59,7.0
2,karthiga,74,7.0
3,karthiga,120,7.0
4,karthiga,178,7.0


In [3]:
df_anime = pd.read_csv('anime.csv')
df_anime = df_anime.dropna()
df_anime.head(5)

Unnamed: 0,anime_id,title,status,aired_string,score,scored_by,rank,popularity,members,genre
0,11013,Inu x Boku SS,Finished Airing,"Jan 13, 2012 to Mar 30, 2012",7.63,139250.0,1274.0,231.0,283882.0,"Comedy, Supernatural, Romance, Shounen"
1,2104,Seto no Hanayome,Finished Airing,"Apr 2, 2007 to Oct 1, 2007",7.89,91206.0,727.0,366.0,204003.0,"Comedy, Parody, Romance, School, Shounen"
2,5262,Shugo Chara!! Doki,Finished Airing,"Oct 4, 2008 to Sep 25, 2009",7.55,37129.0,1508.0,1173.0,70127.0,"Comedy, Magic, School, Shoujo"
3,721,Princess Tutu,Finished Airing,"Aug 16, 2002 to May 23, 2003",8.21,36501.0,307.0,916.0,93312.0,"Comedy, Drama, Magic, Romance, Fantasy"
4,12365,Bakuman. 3rd Season,Finished Airing,"Oct 6, 2012 to Mar 30, 2013",8.67,107767.0,50.0,426.0,182765.0,"Comedy, Drama, Romance, Shounen"


In [4]:
df_country = pd.read_csv('user_country_age.csv')
df_country = df_country.dropna()
df_country.head(5)

Unnamed: 0,username,user_id,gender,country,age
0,karthiga,2255153,Female,India,30
1,RedvelvetDaisuki,1897606,Female,Philippines,26
2,Damonashu,37326,Male,United states,29
3,bskai,228342,Male,Mexico,30
4,terune_uzumaki,327311,Female,Malaysia,22


In [5]:
# use inner join df_anime and df_raw on 'anime_id'
merged_inner = pd.merge(left=df_anime, right=df_raw, left_on='anime_id', right_on='anime_id')
# merged_inner.shape
merged_inner.head()
## spend 1 minutes 

Unnamed: 0,anime_id,title,status,aired_string,score,scored_by,rank,popularity,members,genre,username,my_score
0,11013,Inu x Boku SS,Finished Airing,"Jan 13, 2012 to Mar 30, 2012",7.63,139250.0,1274.0,231.0,283882.0,"Comedy, Supernatural, Romance, Shounen",karthiga,8.0
1,11013,Inu x Boku SS,Finished Airing,"Jan 13, 2012 to Mar 30, 2012",7.63,139250.0,1274.0,231.0,283882.0,"Comedy, Supernatural, Romance, Shounen",thetreedude,7.0
2,11013,Inu x Boku SS,Finished Airing,"Jan 13, 2012 to Mar 30, 2012",7.63,139250.0,1274.0,231.0,283882.0,"Comedy, Supernatural, Romance, Shounen",MistButterfly,7.0
3,11013,Inu x Boku SS,Finished Airing,"Jan 13, 2012 to Mar 30, 2012",7.63,139250.0,1274.0,231.0,283882.0,"Comedy, Supernatural, Romance, Shounen",iLLMaTiCc,6.0
4,11013,Inu x Boku SS,Finished Airing,"Jan 13, 2012 to Mar 30, 2012",7.63,139250.0,1274.0,231.0,283882.0,"Comedy, Supernatural, Romance, Shounen",helenply,9.0


In [6]:
#  use inner join merged_inner and df_country on 'username'

final_merge = pd.merge(left=merged_inner, right=df_country, left_on='username', right_on='username')
final_merge.head(5)

Unnamed: 0,anime_id,title,status,aired_string,score,scored_by,rank,popularity,members,genre,username,my_score,user_id,gender,country,age
0,11013,Inu x Boku SS,Finished Airing,"Jan 13, 2012 to Mar 30, 2012",7.63,139250.0,1274.0,231.0,283882.0,"Comedy, Supernatural, Romance, Shounen",karthiga,8.0,2255153,Female,India,30
1,2104,Seto no Hanayome,Finished Airing,"Apr 2, 2007 to Oct 1, 2007",7.89,91206.0,727.0,366.0,204003.0,"Comedy, Parody, Romance, School, Shounen",karthiga,7.0,2255153,Female,India,30
2,5262,Shugo Chara!! Doki,Finished Airing,"Oct 4, 2008 to Sep 25, 2009",7.55,37129.0,1508.0,1173.0,70127.0,"Comedy, Magic, School, Shoujo",karthiga,7.0,2255153,Female,India,30
3,721,Princess Tutu,Finished Airing,"Aug 16, 2002 to May 23, 2003",8.21,36501.0,307.0,916.0,93312.0,"Comedy, Drama, Magic, Romance, Fantasy",karthiga,7.0,2255153,Female,India,30
4,12365,Bakuman. 3rd Season,Finished Airing,"Oct 6, 2012 to Mar 30, 2013",8.67,107767.0,50.0,426.0,182765.0,"Comedy, Drama, Romance, Shounen",karthiga,8.0,2255153,Female,India,30


### Basic introduction for the merged dataset

In [14]:
# Number of countries in the dataset
country = set(final_merge['country'])
print(len(country))

179


In [15]:
# Number of unique animes in the dataset
animes = set(final_merge['title'])
print(len(animes))

11941


### Get most viewed anime and averaged score from users in dataset by country 

In [12]:
def get_most_viewed_anime_from_country(country, final_merge):
    country_df       = final_merge[final_merge.country.str.contains(country)]
    most_viewed_count = country_df["title"].value_counts()
    result={}
    most_viewed_anime               = most_viewed_count.keys()[0]
    most_viewed_anime_average_score = round(country_df[country_df.title.isin([country_df["title"].value_counts().keys()[0]])].my_score.mean(), 2)
    result[most_viewed_anime]       = most_viewed_anime_average_score
    return result
    
    

In [13]:
get_most_viewed_anime_from_country("India", final_merge)

{'Death Note': 9.14}

In [16]:
# Since it is a large dataset, it takes 20 miniutes to run

most_viewed_by_country = {}
for ct in list(country):
    most_viewed_by_country[ct] = get_most_viewed_anime_from_country(ct ,final_merge)
### Takes 20 mins to get the result

In [17]:
most_viewed_by_country

{'Cayman islands': {'Denpa Onna to Seishun Otoko': 8.5},
 'Qatar': {'Death Note': 9.53},
 'Hungary': {'Death Note': 8.7},
 'Macedonia': {'Death Note': 9.12},
 'Saint lucia': {'Fairy Tail': 9.5},
 'Ecuador': {'Death Note': 8.88},
 'Barbados': {'Vampire Knight': 7.25},
 'Australia': {'Death Note': 8.59},
 'Georgia': {'Death Note': 8.74},
 'Bahrain': {'Death Note': 9.25},
 'Nigeria': {'Bleach': 7.56},
 'Slovakia': {'Death Note': 8.7},
 'Estonia': {'Death Note': 8.85},
 'Albania': {'Death Note': 9.19},
 'Maldives': {'One Piece': 8.33},
 'Jamaica': {'Naruto': 8.07},
 'Gabon': {'Chobits': 7.0},
 'Mali': {'Death Note': 9.0},
 'Indonesia': {'Sword Art Online': 8.23},
 'Costa rica': {'Death Note': 8.89},
 'Thailand': {'Toradora!': 8.6},
 'Tanzania': {'Boku wa Tomodachi ga Sukunai': 7.0},
 'Malaysia': {'Angel Beats!': 8.73},
 'Brazil': {'Death Note': 8.75},
 'Liberia': {'Fullmetal Alchemist': 9.0},
 'Guatemala': {'Death Note': 8.75},
 'Reunion': {'Dragon Ball Z Movie 14: Kami to Kami': 10.0},
 '

In [28]:
# import pycountry
input_countries = list(most_viewed_by_country.keys()) 

countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_2

Coun_codes = [countries.get(country, country).lower() for country in input_countries]

# print(Coun_codes)
most_viewed_by_country1 = most_viewed_by_country
for i in range(len(Coun_codes)):
    most_viewed_by_country[Coun_codes[i]] = most_viewed_by_country.pop(input_countries[i])



In [29]:
#  
country_name_covert = {'marshall islands': 'us',
 'taiwan': 'tw',
 'new zealand': 'nz',
 'el salvador': 'sv',
 'macau': 'mo',
 'syria': 'sy',
 'vatican city': 'va',
 'korea, north': 'kp',
 'cayman islands': 'gb',
 'dominican republic': 'do',
 'united states': 'us',
 'burkina faso': 'bf',
 'kosovo': 'al',
 'san marino': 'sm',
 'saint lucia': '',
 'burma': 'mm',
 'côte d’ivoire': 'fr',
 'south sudan': '',
 'sri lanka': 'lk',
 'isle of man': '',
 'venezuela': 've',
 'south africa': 'za',
 'laos': 'la',
 'united kingdom': 'gb',
 'vietnam': 'vn',
 'bosnia and herzegovina': 'ba',
 'korea, south': 'kr',
 'trinidad and tobago': '',
 'iran': 'ir',
 'french polynesia': 'fr',
 'saint vincent and the grenadines': '',
 'moldova': 'md',
 'french guiana': 'gf',
 'sierra leone': 'sl',
 'russia': 'ru',
 'bolivia': 'bo',
 'saudi arabia': 'sa',
 'solomon islands': '',
 'united arab emirates': 'ae',
 'hong kong': 'hk',
 'bahamas, the': '',
 'swaziland': 'sz',
 'reunion': 're',
 'timor-leste': 'tl',
 'macedonia': 'mk',
 'costa rica': 'cr',
 'faroe islands': 'dk',
 'puerto rico': 'pr',
 'tanzania': 'tz',
 'brunei': 'bn'}

In [33]:
for i in country_name_covert:
     if i in most_viewed_by_country:
            if len(country_name_covert[i]) == 2:
                most_viewed_by_country[country_name_covert[i]] = most_viewed_by_country.pop(i)
            else:
                # Removed the country name which can't find country code  
                print("Unknown country code: " + str(i)) 
                most_viewed_by_country.pop(i)
            
most_viewed_by_country

Unknown country code: saint lucia
Unknown country code: south sudan
Unknown country code: isle of man
Unknown country code: trinidad and tobago
Unknown country code: saint vincent and the grenadines
Unknown country code: solomon islands
Unknown country code: bahamas, the


{'qa': {'Death Note': 9.53},
 'hu': {'Death Note': 8.7},
 'ec': {'Death Note': 8.88},
 'bb': {'Vampire Knight': 7.25},
 'au': {'Death Note': 8.59},
 'ge': {'Death Note': 8.74},
 'bh': {'Death Note': 9.25},
 'ng': {'Bleach': 7.56},
 'sk': {'Death Note': 8.7},
 'ee': {'Death Note': 8.85},
 'al': {'Orange': 7.0},
 'mv': {'One Piece': 8.33},
 'jm': {'Naruto': 8.07},
 'ga': {'Chobits': 7.0},
 'ml': {'Death Note': 9.0},
 'id': {'Sword Art Online': 8.23},
 'th': {'Toradora!': 8.6},
 'my': {'Angel Beats!': 8.73},
 'br': {'Death Note': 8.75},
 'lr': {'Fullmetal Alchemist': 9.0},
 'gt': {'Death Note': 8.75},
 'cn': {'Code Geass: Hangyaku no Lelouch R2': 9.02},
 'ar': {'Death Note': 8.58},
 'si': {'Death Note': 8.93},
 'ma': {'Death Note': 9.42},
 'aw': {'Fairy Tail': 8.14},
 'uz': {'Death Note': 7.86},
 'es': {'Death Note': 8.54},
 'jo': {'Death Note': 8.74},
 'az': {'Death Note': 8.88},
 'dj': {'Psycho-Pass': 8.0},
 'dk': {'Naruto': 7.6},
 'se': {'Death Note': 8.56},
 'kz': {'Death Note': 8.81}

In [36]:
for i in most_viewed_by_country:
    print(list(most_viewed_by_country[i].values())[0])

9.53
8.7
8.88
7.25
8.59
8.74
9.25
7.56
8.7
8.85
7.0
8.33
8.07
7.0
9.0
8.23
8.6
8.73
8.75
9.0
8.75
9.02
8.58
8.93
9.42
8.14
7.86
8.54
8.74
8.88
8.0
7.6
8.56
8.81
7.62
9.26
9.2
8.56
7.0
8.8
8.75
8.83
8.61
8.2
10.0
8.72
9.18
8.67
8.75
8.76
1.0
8.0
7.67
7.5
9.13
8.37
8.63
9.0
6.0
8.85
7.38
9.0
5.5
7.0
8.85
9.53
8.39
9.18
9.02
8.66
7.71
9.14
8.92
9.0
7.75
8.79
9.0
8.73
7.56
9.06
9.14
9.25
9.19
7.0
9.28
9.4
9.0
8.85
8.75
10.0
8.87
8.91
8.95
8.0
9.13
7.86
8.53
8.5
8.95
9.19
8.64
8.94
7.14
8.33
8.71
8.85
6.33
7.5
9.42
4.0
9.1
7.43
9.24
8.0
8.84
8.49
8.6
9.42
6.5
8.23
10.0
8.17
9.5
10.0
9.24
8.85
8.0
9.43
9.18
9.12
8.52
9.0
8.75
9.26
7.0
6.75
8.7
9.19
8.55
6.67
8.0
8.4
8.75
8.69
8.63
8.5
8.59
9.22
8.44
8.59
8.95
7.0
6.0
8.28
8.21
9.22
8.91
8.8
10.0
10.0
7.0
9.12
8.89
8.98
7.0
8.53


In [54]:
country_by_animes = {}
for i in most_viewed_by_country:
    if list(most_viewed_by_country[i].keys())[0] in country_by_animes:
        res = country_by_animes[list(most_viewed_by_country[i].keys())[0]]
        res[i] = list(most_viewed_by_country[i].values())[0]
    else:
        res={}
        res[i] = list(most_viewed_by_country[i].values())[0]
        country_by_animes[list(most_viewed_by_country[i].keys())[0]] = res
    
# Sort country_by_animes dictionary by values
country_by_animes = {k: v for k, v in sorted(country_by_animes.items(), key=lambda item: len(item[1]), reverse=True)}

In [56]:
country_by_animes 

{'Death Note': {'qa': 9.53,
  'hu': 8.7,
  'ec': 8.88,
  'au': 8.59,
  'ge': 8.74,
  'bh': 9.25,
  'sk': 8.7,
  'ee': 8.85,
  'ml': 9.0,
  'br': 8.75,
  'gt': 8.75,
  'ar': 8.58,
  'si': 8.93,
  'ma': 9.42,
  'uz': 7.86,
  'es': 8.54,
  'jo': 8.74,
  'az': 8.88,
  'se': 8.56,
  'kz': 8.81,
  'fi': 8.56,
  'is': 8.8,
  'ad': 8.75,
  'mt': 8.83,
  'ca': 8.61,
  'jp': 8.72,
  'eg': 9.18,
  'mg': 8.67,
  'nl': 8.76,
  'me': 9.13,
  'it': 8.37,
  'ua': 8.63,
  'py': 9.0,
  'mn': 9.0,
  'pe': 8.85,
  'cy': 9.53,
  'cl': 8.39,
  'bd': 9.18,
  'hr': 9.02,
  'be': 8.66,
  'in': 9.14,
  'pt': 8.92,
  'tr': 9.0,
  'gh': 7.75,
  'lb': 8.79,
  'cz': 8.73,
  'lv': 9.06,
  'pk': 9.14,
  'tn': 9.25,
  'lt': 9.19,
  'ro': 9.28,
  'co': 8.85,
  'il': 8.87,
  'at': 8.91,
  'iq': 8.95,
  'hn': 9.13,
  'ch': 8.53,
  'by': 8.5,
  'gr': 8.95,
  'pa': 9.19,
  'ie': 8.64,
  'rs': 8.94,
  'no': 8.71,
  'pl': 8.85,
  'mu': 9.42,
  'ni': 9.1,
  'bg': 9.24,
  'mx': 8.84,
  'uy': 8.6,
  'ly': 9.42,
  'dm': 9.24,
  

In [58]:
# import pygal.maps
# import pygal_maps_world.maps
# from pygal.style import Style
custome_style = Style(legend_font_size = 5, )
worldmap = pygal_maps_world.maps.World(style = custome_style)

  
# set the title of the map 
worldmap.title = 'The most popular anime in each country and its score '
   

for i in country_by_animes:

    worldmap.add(i,country_by_animes[i]) 

worldmap.render_in_browser()



file:///var/folders/_y/9wzsy3zn0mb6k84x6x2csywc0000gn/T/tmpqf2vog9g.html
