# AOE2
The following kernel process a subset of Age of Empires DE games

In [None]:
# Basic setup stuff
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        display(os.path.join(dirname, filename))

plt.rcParams['figure.figsize'] = (12.0, 8.0)  # default figure size

Read both the matches and match_players data to show the type of data that the files store.
* **matches.csv**: It contains information about the matches play, one by row. The match is identified by the _token_ field
* **match_players.csv**: It contains information about the players that where involved in every match. The _token_ identifies the player and the _match_ field is the foreign key of _matches.csv_

In [None]:
matches = pd.read_csv("/kaggle/input/dataset/matches.csv")
display(matches.info())
matches.head()

In [None]:
players = pd.read_csv("/kaggle/input/dataset/match_players.csv")
display(players.info())
players.head()

Point 1: Games that took more than 2 hour per server

In [None]:
matches["duration"] = pd.to_timedelta(matches["duration"])
huge_duration_matches = matches[(matches["average_rating"] > 2000) & (matches["duration"] > pd.to_timedelta("02:00:00"))]
long_games = huge_duration_matches.groupby(["server"])["token"].count().reset_index().rename(columns={"token": "counts"})
print(long_games[["server", "counts"]])

Point 2: Games where the player with less rating (30% difference) won the game in 1v1 games (rating > 1000)

In [None]:
for idx,group in players.groupby("match"):
    # Filter games with more than two players
    if len(group) == 2:
        # Check if the player with lower rating won
        conditionMet = False
        rating_diff = 0
        if group.iloc[0]["rating"] < group.iloc[1]["rating"] and group.iloc[0]["winner"] == True and group.iloc[0]["rating"] > 1000:
            rating_diff = (group.iloc[1]["rating"] - group.iloc[0]["rating"]) / group.iloc[0]["rating"] * 100
            conditionMet = True
        elif group.iloc[0]["rating"] < group.iloc[1]["rating"] and group.iloc[0]["winner"] == True and group.iloc[1]["rating"] > 1000: 
            conditionMet = True
            rating_diff = (group.iloc[0]["rating"] - group.iloc[1]["rating"]) / group.iloc[1]["rating"] * 100
            
        # Compare the rating, and only show those ones where the rating pct is above 30%
        if conditionMet and rating_diff > 30:
            print("Match: {} - Player1 rating: {} - Player2 rating: {} - Rating diff: {:.2f}".format(group.iloc[0]["match"], group.iloc[0]["rating"], group.iloc[1]["rating"], rating_diff))

Join both datasets to be able to do complete points 3 and 4

In [None]:
# Drop unused fields to reduce the memory footprint of the simulation
matches.pop("map_size")
matches.pop("num_players")
matches.pop("average_rating")
matches.pop("winning_team")
matches.pop("patch")
players.pop("token")
players.pop("color")

join_matches = pd.merge(players, matches, left_on="match", right_on="token")
display(join_matches.info())
print(join_matches.head())

Point 3: Civ win rate in 1v1 games in map arena

In [None]:
join_matches_1v1_no_mirror = join_matches[(join_matches["ladder"] == "RM_1v1") & (join_matches["mirror"] == False) & (join_matches["map"] == "arena")]
win_per_civ = join_matches_1v1_no_mirror.groupby("civ")["winner"].mean().reset_index().rename(columns={"winner": "win_rate"}).sort_values("win_rate", ascending=False)
win_per_civ["win_rate"] = win_per_civ["win_rate"] * 100
print(win_per_civ[["civ", "win_rate"]])


Point 4: Usage rate of every civ used by pro players (rating > 2000) in team matches in map islands

In [None]:
join_matches_1v1_pros = join_matches[(join_matches["ladder"] != "RM_1v1") & (join_matches["rating"] > 2000) & (join_matches["map"] == "islands")]
top5_civs = join_matches_1v1_pros.groupby("civ")["token"].count().reset_index().rename(columns={"token": "counts"})
top5_civs = top5_civs[["civ", "counts"]].sort_values(by='counts', ascending=False).head(5)
print(top5_civs[["civ", "counts"]])