# 0. Import Library

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.offsetbox import (OffsetImage,AnnotationBbox)
from matplotlib import transforms
import matplotlib.animation as animation
import time as time

sns.set_theme()

# 2. Data Understanding
# 2.1. Tổng quan kích thước, kiểu dữ liệu

In [None]:
df = pd.read_csv('data/kill_match_stats_v2.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

## 2.2. Phân bố của dữ liệu trong từng cột
### 2.2.1. Các cột tọa độ

In [None]:
fig, axes = plt.subplots(ncols = 2, nrows = 2, figsize=(12, 8), sharex = 'col', sharey = 'row')
fig.subplots_adjust(wspace = 0.1)

axes[0][0].hist(df['kx'], bins = 100, edgecolor = 'C0')
axes[0][1].hist(df['ky'], bins = 100, edgecolor = 'C0')
axes[1][0].hist(df['vx'], bins = 100, color = 'C1', edgecolor = 'C1')
axes[1][1].hist(df['vy'], bins = 100, color = 'C1', edgecolor = 'C1')

axes[0][0].set_title('kx')
axes[0][1].set_title('ky')
axes[1][0].set_title('vx')
axes[1][1].set_title('vy');

### 2.2.2. Cột dis

In [None]:
fig = plt.figure(figsize=(12, 6))

sns.histplot(df['dis'], bins = 1000)

### 2.2.3. Cột `time` và `phase`

In [None]:
fig, axes = plt.subplots(ncols = 2, figsize = (15, 5))

axes[0].hist(df['time'], bins = 1000, edgecolor = 'C0')

data = df.groupby(by = ['phase']).count().reset_index()[['phase', 'type']]
sns.barplot(data = data, x = 'phase', y = 'type', ax = axes[1], color = 'C1')

axes[0].set_title('time')
axes[1].set_title('phase')

In [None]:
fig, ax = plt.subplots(figsize=(12, 4))

y, _, _ = plt.hist(df['time'], bins = 1000, edgecolor = 'C0')

phase_rect = [(0, 720), (721, 1060), (1061, 1300), (1301, 1480), (1481, 1650), (1651, 1760), (1761, 1880), (1881, 1970), (1971, 2150)]

for rect in phase_rect:
    ax.add_patch(patches.Rectangle((rect[0], 0), rect[1] - rect[0], y.max(), color = 'C1', alpha = 0.2))

### 2.2.4. Cột `killed_by` và `type`

In [None]:
data = df['killed_by'].value_counts().reset_index()

fig, ax = plt.subplots(figsize=(8, 40))
ax = sns.barplot(data = data, x = 'killed_by', y = 'index', color = 'C0')

ax.set_xlim(0, data['killed_by'].max() * 1.3)
ax.set_yticklabels([])
ax.set_xticklabels([])
ax.set_xlabel('')
ax.set_ylabel('')
ax.grid(visible = False)
ax.set_title('Killed_by count')

#annotate the bars
for i, v in enumerate(data['killed_by']):
    ax.text(v + 10000, i, f'{v:,}', va = 'center', fontsize = 11)

#use asset item as tick labels
for i, v in enumerate(data['index']):  
    img = plt.imread('asset/item/' + v +'.png', format = 'png')

    imagebox = OffsetImage(img, zoom = 0.07)
    imagebox.image.axes = ax

    ab = AnnotationBbox(imagebox, (0, 0),
                        xybox=(-10000, i),
                        xycoords=("data"),
                        box_alignment = (1, 0.5),
                        bboxprops = {'edgecolor': 'none'})

    ax.add_artist(ab)


In [None]:
data = df['type'].value_counts().reset_index()

fig, ax = plt.subplots(figsize=(8, 8))
ax = sns.barplot(data = data, x = 'type', y = 'index', color = 'C0')

ax.set_xlim(0, data['type'].max() * 1.3)
ax.set_yticklabels([])
ax.set_xticklabels([])
ax.set_xlabel('')
ax.set_ylabel('')
ax.grid(visible = False)
ax.set_title('Type count')

#annotate the bars
for i, v in enumerate(data['type']):
    ax.text(v + 100000, i, f'{v:,}', va = 'center', fontsize = 11)

#use icon as tick labels
for i, v in enumerate(data['index']):  
    img = plt.imread('asset/type_item/' + v +'.png')

    imagebox = OffsetImage(img, zoom = 0.05)
    imagebox.image.axes = ax

    ab = AnnotationBbox(imagebox, (0, 0),
                        xybox=(-100000, i),
                        xycoords=("data"),
                        box_alignment = (1, 0.5),
                        bboxprops = {'edgecolor': 'none'})

    ax.add_artist(ab)