Dataset
- https://www.kaggle.com/datasets/filipechavesdemacedo/counter-strike-competitive-data/data

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import gc

In [5]:
df_players = pd.read_csv('data/match_players.csv')
df_results = pd.read_csv('data/match_results.csv', low_memory=False)
df_stats = pd.read_csv('data/players_stats.csv')

In [6]:
df_players.columns

Index(['Unnamed: 0', 'adr', 'assists', 'deaths', 'fkdiff', 'hs', 'kdratio',
       'kills', 'match_id', 'match_link', 'player_id', 'player_nick',
       'players_link', 'rating', 'team_name'],
      dtype='object')

In [7]:
df_players_clean = df_players.drop(["Unnamed: 0","players_link"], axis=1)
df_players_clean

Unnamed: 0,adr,assists,deaths,fkdiff,hs,kdratio,kills,match_id,match_link,player_id,player_nick,rating,team_name
0,163.2,3,10,1,10,90.0%,32,32227,/stats/matches/mapstatsid/32227/intz-vs-g3x,5736,kNgV-,2.44,g3x
1,81.0,3,6,1,5,75.0%,17,32227,/stats/matches/mapstatsid/32227/intz-vs-g3x,2532,mch,1.55,g3x
2,77.6,3,10,1,11,75.0%,16,32227,/stats/matches/mapstatsid/32227/intz-vs-g3x,7382,steel,1.41,g3x
3,77.0,2,10,-1,6,85.0%,14,32227,/stats/matches/mapstatsid/32227/intz-vs-g3x,5698,pava,1.38,g3x
4,61.2,4,12,4,4,85.0%,10,32227,/stats/matches/mapstatsid/32227/intz-vs-g3x,10563,caike,1.16,g3x
...,...,...,...,...,...,...,...,...,...,...,...,...,...
945860,105.0,7,18,0,11,66.7%,23,111478,/stats/matches/mapstatsid/111478/ago-vs-pact,8327,Furlan,1.28,AGO
945861,81.0,5,18,0,9,62.5%,13,111478,/stats/matches/mapstatsid/111478/ago-vs-pact,19044,F1KU,0.92,AGO
945862,55.5,0,17,-1,7,54.2%,15,111478,/stats/matches/mapstatsid/111478/ago-vs-pact,20289,DGL,0.78,AGO
945863,46.8,4,17,-2,5,62.5%,10,111478,/stats/matches/mapstatsid/111478/ago-vs-pact,8539,leman,0.65,AGO


In [8]:
df_results.columns

Index(['Unnamed: 0', 'data_unix', 'event_name', 'map', 'match_id',
       'match_link', 'offset', 'team_1_id', 'team_1_link', 'team_1_name',
       'team_1_score', 'team_2_id', 'team_2_link', 'team_2_name',
       'team_2_score', 'timestamp', 'hour', 'day', 'week', 'month', 'year',
       'weekday'],
      dtype='object')

In [9]:
df_results_clean = df_results.drop(["Unnamed: 0"], axis=1)
df_results_clean

Unnamed: 0,data_unix,event_name,map,match_id,match_link,offset,team_1_id,team_1_link,team_1_name,team_1_score,...,team_2_link,team_2_name,team_2_score,timestamp,hour,day,week,month,year,weekday
0,1622397600000,Elisa Invitational Summer 2021 Regionals,Overpass,121306,/stats/matches/mapstatsid/121306/case-vs-copen...,0,10894,/stats/teams/10894/case,Case,8,...,/stats/teams/7461/copenhagen-flames,Copenhagen Flames,16,2021-05-30 15:00:00,15,30,21,5,2021,6
1,1622397600000,Elisa Invitational Summer 2021 Regionals,Inferno,121296,/stats/matches/mapstatsid/121296/copenhagen-fl...,0,7461,/stats/teams/7461/copenhagen-flames,Copenhagen Flames,16,...,/stats/teams/10894/case,Case,3,2021-05-30 15:00:00,15,30,21,5,2021,6
2,1622394900000,ESEA Spring Cash Cup 6 Europe,Inferno,121298,/stats/matches/mapstatsid/121298/mad-lions-vs-...,0,8362,/stats/teams/8362/mad-lions,MAD Lions,16,...,/stats/teams/11103/19cm-gang,19CM GANG,8,2021-05-30 14:15:00,14,30,21,5,2021,6
3,1622394900000,ESEA Spring Cash Cup 6 Europe,Mirage,121292,/stats/matches/mapstatsid/121292/mad-lions-vs-...,0,8362,/stats/teams/8362/mad-lions,MAD Lions,12,...,/stats/teams/11103/19cm-gang,19CM GANG,16,2021-05-30 14:15:00,14,30,21,5,2021,6
4,1622394600000,Ghetto eGames Cup 5,Vertigo,121304,/stats/matches/mapstatsid/121304/contact-light...,0,11104,/stats/teams/11104/contact-light,Contact Light,16,...,/stats/teams/11079/trasko,Trasko,19,2021-05-30 14:10:00,14,30,21,5,2021,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94602,1348254000000,DreamHack Valencia 2012,Dust2_se,12868,/stats/matches/mapstatsid/12868/nip-vs-34united,94600,4411,/stats/teams/4411/nip,NIP,16,...,/stats/teams/4470/34united,34united,2,2012-09-21 16:00:00,16,21,38,9,2012,4
94603,1347562912000,CyberGamer Qualifier Cup #1 by Tt eSPORTS,Dust2_se,12841,/stats/matches/mapstatsid/12841/fmesports-vs-b...,94600,4445,/stats/teams/4445/fmesports,FMESPORTS,7,...,/stats/teams/4444/blight,Blight,16,2012-09-13 16:01:52,16,13,37,9,2012,3
94604,1347562800000,CyberGamer Qualifier Cup #1 by Tt eSPORTS,Inferno_se,12840,/stats/matches/mapstatsid/12840/blight-vs-fmes...,94600,4444,/stats/teams/4444/blight,Blight,16,...,/stats/teams/4445/fmesports,FMESPORTS,2,2012-09-13 16:00:00,16,13,37,9,2012,3
94605,1347562800000,Go4CS:GO Cup #6,Mirage_ce,12839,/stats/matches/mapstatsid/12839/nip-vs-prime,94600,4411,/stats/teams/4411/nip,NIP,16,...,/stats/teams/4443/prime,PRiME,1,2012-09-13 16:00:00,16,13,37,9,2012,3


In [10]:
df_stats.columns

Index(['Unnamed: 0', 'assists_round', 'deaths_round', 'dmg_round',
       'gnd_dmg_round', 'hs_percentage', 'impact', 'kast', 'kd_ratio',
       'kills_round', 'maps_played', 'player_age', 'player_country',
       'player_id', 'player_link', 'player_nick', 'rating_1', 'rounds_played',
       'save_team_round', 'saved_by_round', 'total_deaths', 'total_kills'],
      dtype='object')

In [11]:
df_stats_clean = df_stats.drop(["Unnamed: 0","player_link"], axis=1)
df_stats_clean

Unnamed: 0,assists_round,deaths_round,dmg_round,gnd_dmg_round,hs_percentage,impact,kast,kd_ratio,kills_round,maps_played,player_age,player_country,player_id,player_nick,rating_1,rounds_played,save_team_round,saved_by_round,total_deaths,total_kills
0,0.11,0.71,72.9,2.8,58.3%,0.98,67.2%,0.97,0.68,79,19,Kazakhstan,18549,laser,1.00,2093,0.10,0.09,1477,1433
1,0.12,0.70,76.7,4.1,53.0%,1.12,69.1%,1.01,0.71,189,23,Australia,16531,tensai,1.06,4857,0.11,0.11,3404,3455
2,0.17,0.66,88.8,3.8,19.8%,1.33,74.8%,1.23,0.81,5,-,Brazil,17334,Seris,1.28,131,0.14,0.11,86,106
3,0.06,0.69,54.5,3.0,31.1%,0.44,68.7%,0.79,0.54,3,31,Australia,14508,deefekt,0.76,83,0.06,0.07,57,45
4,0.14,0.74,60.7,2.8,32.4%,0.76,65.9%,0.76,0.56,5,-,Korea,11666,fAwn,0.82,132,0.14,0.13,98,74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14585,0.17,0.81,65.6,3.2,33.3%,0.60,61.2%,0.61,0.50,4,-,Korea,11541,hide,0.73,103,0.06,0.08,83,51
14586,0.16,0.88,49.0,1.3,40.0%,0.32,53.6%,0.41,0.36,3,22,United States,17463,catez,0.47,56,0.04,0.11,49,20
14587,0.03,0.82,73.6,3.0,50.0%,0.92,52.2%,0.78,0.64,4,-,United States,15772,Vegaborne,0.83,90,0.03,0.08,74,58
14588,0.13,0.71,63.2,2.5,50.4%,0.78,63.9%,0.73,0.52,10,-,Denmark,16701,gudluc4z,0.85,255,0.12,0.12,181,133
