In [None]:
import numpy as np
import pandas as pd
from collections import Counter

# plotting
import seaborn as sns
import matplotlib.pyplot as plt

# setting params
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (30, 10),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'}

sns.set_style('whitegrid')
sns.set_context('talk')

plt.rcParams.update(params)


battles_df = pd.read_csv('../input/game-of-thrones/battles.csv')


# Display sample rows
print(battles_df.head())


# ## Explore raw properties
print("Number of attributes available in the dataset = {}".format(battles_df.shape[1]))


# View available columns and their data types
print(battles_df.dtypes)


# Analyze properties of numerical columns
battles_df.describe()

print("Number of battles fought={}".format(battles_df.shape[0]))


sns.countplot(y='year',data=battles_df)
plt.title('Battle Distribution over Years')
plt.show()


sns.countplot(x='region',data=battles_df)
plt.title('Battles by Regions')
plt.show()




f, ax1 = plt.subplots()
ax2 = ax1.twinx()
temp_df = battles_df.groupby('region').agg({'major_death':'sum',
                                            'major_capture':'sum'}).reset_index()
temp_df.loc[:,'dummy'] = 'dummy'
sns.barplot(x="region", y="major_death", 
            hue='dummy', data=temp_df, 
            estimator = np.sum, ax = ax1, 
            hue_order=['dummy','other'])

sns.barplot(x="region", y="major_capture", 
            data=temp_df, hue='dummy',
            estimator = np.sum, ax = ax2, 
            hue_order=['other','dummy'])

ax1.legend_.remove()
ax2.legend_.remove()


attacker_king = battles_df.attacker_king.value_counts()
attacker_king.name='' # turn off annoying y-axis-label
attacker_king.plot.pie(figsize=(6, 6),autopct='%.2f')


defender_king = battles_df.defender_king.value_counts()
defender_king.name='' # turn off annoying y-axis-label
defender_king.plot.pie(figsize=(6, 6),autopct='%.2f')


sns.countplot(y='battle_type',data=battles_df)
plt.title('Battle Type Distribution')
plt.show()



sns.countplot(y='attacker_outcome',data=battles_df)
plt.title('Attack Win/Loss Distribution')
plt.show()



attack_winners = battles_df[battles_df.\
                            attacker_outcome=='win']\
                                ['attacker_king'].\
                                value_counts().\
                                reset_index()
                                
attack_winners.rename(
        columns={'index':'king',
                 'attacker_king':'wins'},
         inplace=True)

attack_winners.loc[:,'win_type'] = 'attack'

defend_winners = battles_df[battles_df.\
                            attacker_outcome=='loss']\
                            ['defender_king'].\
                            value_counts().\
                            reset_index()
defend_winners.rename(
        columns={'index':'king',
                 'defender_king':'wins'},
         inplace=True)

defend_winners.loc[:,'win_type'] = 'defend'                                                                     


sns.barplot(x="king", 
            y="wins", 
            hue="win_type", 
            data=pd.concat([attack_winners,
                            defend_winners]))
plt.title('Kings and Their Wins')
plt.ylabel('wins')
plt.xlabel('king')
plt.show()


battles_df['attack_commander_count'] = battles_df.\
                                        dropna(subset=['attacker_commander']).\
                                        apply(lambda row: \
                                              len(row['attacker_commander'].\
                                                  split()),axis=1)
battles_df['defend_commander_count'] = battles_df.\
                                        dropna(subset=['defender_commander']).\
                                        apply(lambda row: \
                                              len(row['defender_commander'].\
                                                  split()),axis=1)

battles_df[['attack_commander_count',
            'defend_commander_count']].plot(kind='box')




battles_df['attacker_house_count'] = (4 - battles_df[['attacker_1', 
                                                'attacker_2', 
                                                'attacker_3', 
                                                'attacker_4']].\
                                        isnull().sum(axis = 1))

battles_df['defender_house_count'] = (4 - battles_df[['defender_1',
                                                'defender_2', 
                                                'defender_3', 
                                                'defender_4']].\
                                        isnull().sum(axis = 1))

battles_df['total_involved_count'] = battles_df.apply(lambda row: \
                                      row['attacker_house_count'] + \
                                      row['defender_house_count'],
                                                      axis=1)
battles_df['bubble_text'] = battles_df.apply(lambda row: \
          '{} had {} house(s) attacking {} house(s) '.\
          format(row['name'],
                 row['attacker_house_count'],
                 row['defender_house_count']),
                 axis=1)


house_balance = battles_df[
        battles_df.attacker_house_count != \
        battles_df.defender_house_count][['name',
                                       'attacker_house_count',
                                       'defender_house_count']].\
        set_index('name')
house_balance.plot(kind='bar')


army_size_df = battles_df.dropna(subset=['total_involved_count',
                          'attacker_size',
                          'defender_size',
                         'bubble_text'])
army_size_df.plot(kind='scatter', x='defender_size',y='attacker_size',
                  s=army_size_df['total_involved_count']*150)


temp_df = battles_df.dropna(
                    subset = ["attacker_king",
                              "defender_king"])[
                                ["attacker_king",
                                 "defender_king"]
                                ]

archenemy_df = pd.DataFrame(
                list(Counter(
                        [tuple(set(king_pair)) 
                         for king_pair in temp_df.values
                         if len(set(king_pair))>1]).
                            items()),
                columns=['king_pair',
                         'battle_count'])

archenemy_df['versus_text'] = archenemy_df.\
                                apply(
                                    lambda row:
                                '{} Vs {}'.format(
                                        row[
                                            'king_pair'
                                            ][0],
                                        row[
                                            'king_pair'
                                            ][1]),
                                        axis=1)
archenemy_df.sort_values('battle_count',
                         inplace=True,
                         ascending=False)


archenemy_df[['versus_text',
              'battle_count']].set_index('versus_text',
                                          inplace=True)
sns.barplot(data=archenemy_df,
            x='versus_text',
            y='battle_count')
plt.xticks(rotation=45)
plt.xlabel('Archenemies')
plt.ylabel('Number of Battles')
plt.title('Archenemies')
plt.show()