## **Libraries**

In [4]:
import pandas as pd
import numpy as np

## **Extract**

In [5]:
cs_data = pd.read_csv('datasets/raw/csgo_player_stats.csv')

In [6]:
valorant_data = pd.read_csv('datasets/raw/valorant_player_stats.csv')

## **Transform**

In [7]:
cs_data.isnull().sum()

Unnamed: 0      0
name            0
country         0
teams           0
total_maps      0
total_rounds    0
kd_diff         0
kd              0
rating          0
dtype: int64

In [8]:
valorant_data.isnull().sum()

player       0
country      0
team       188
rounds       0
rating       0
ACS          0
K/D          0
ADR          0
KPR          0
DPR          0
APR          0
FBPR         0
FDPR         0
HS%          0
FBSR%        0
dtype: int64

In [9]:
valorant_data.team.fillna('Solo', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  valorant_data.team.fillna('Solo', inplace=True)


In [10]:
valorant_data.isnull().sum()

player     0
country    0
team       0
rounds     0
rating     0
ACS        0
K/D        0
ADR        0
KPR        0
DPR        0
APR        0
FBPR       0
FDPR       0
HS%        0
FBSR%      0
dtype: int64

In [11]:
cs_data.columns

Index(['Unnamed: 0', 'name', 'country', 'teams', 'total_maps', 'total_rounds',
       'kd_diff', 'kd', 'rating'],
      dtype='object')

In [12]:
valorant_data.columns

Index(['player', 'country', 'team', 'rounds', 'rating', 'ACS', 'K/D', 'ADR',
       'KPR', 'DPR', 'APR', 'FBPR', 'FDPR', 'HS%', 'FBSR%'],
      dtype='object')

In [13]:
cs_data.drop(columns=['Unnamed: 0','total_maps', 'kd_diff'], axis=1, inplace=True)

In [14]:
cs_data.rename(columns={'name': 'player_name', 'teams': 'team'}, inplace=True)
cs_data['game'] = 'CSGO'
cs_data.columns

Index(['player_name', 'country', 'team', 'total_rounds', 'kd', 'rating',
       'game'],
      dtype='object')

In [15]:
valorant_data.rename(columns={'player': 'player_name', 'rounds': 'total_rounds', 'K/D': 'kd'}, inplace=True)

In [16]:
valorant_data.drop(columns=['ACS', 'ADR', 'KPR', 'DPR', 'APR', 'FBPR', 'FDPR', 'HS%', 'FBSR%'], inplace=True)

In [17]:
valorant_data=valorant_data[['player_name', 'country', 'team', 'total_rounds', 'kd', 'rating']]
valorant_data['game'] = 'Valorant'
valorant_data.columns

Index(['player_name', 'country', 'team', 'total_rounds', 'kd', 'rating',
       'game'],
      dtype='object')

In [18]:
type(cs_data.team[0])

str

In [19]:
cs_data.team=pd.DataFrame([value.strip('[').strip(']').split(', ')[0].strip("'") for value in cs_data.team])

In [20]:
cs_data.team[0]

'Vitality'

In [21]:
valorant_data.country.unique()

array(['cl', 'ru', 'id', 'br', 'kr', 'cn', 'world', 'be', 'rs', 'de',
       'ca', 'fi', 'us', 'th', 'latam', 'gb', 'ph', 'fr', 'vn', 'eu',
       'nl', 'co', 'ch', 'dk', 'tr', 'sg', 'pl', 'in', 'kw', 'lk', 'eg',
       'la', 'jp', 'pe', 'es', 'my', 'ee', 'se', 'cis', 'tn', 'pk', 'ar',
       'gt', 'ma', 'ro', 'ie', 'do', 'mx', 'cz', 'ae', 'il', 'lv', 'au',
       'asia', 'bd', 'al', 'hu', 'bg', 'hk', 'lt', 'kz', 'at', 'tw', 'it',
       've', 'cr', 'dz', 'mv', 'ec', 'ua', 'jo', 'md', 'by', 'kh', 'sa',
       'qa', 'pr', 'ps', 'sk', 'ba', 'mk', 'za', 'no', 'pt', 'gr', 'wa',
       'hr', 'sy', 'lb', 'mn', 'nz', 'bh', 'si', 'py', 'uy', 'is', 'bn',
       'ir', 'hn', 'oce'], dtype=object)

In [22]:
# Update the country column in valorant_data to full country names
countries_short=['cl', 'ru', 'id', 'br', 'kr', 'cn', 'world', 'be', 'rs', 'de',
       'ca', 'fi', 'us', 'th', 'latam', 'gb', 'ph', 'fr', 'vn', 'eu',
       'nl', 'co', 'ch', 'dk', 'tr', 'sg', 'pl', 'in', 'kw', 'lk', 'eg',
       'la', 'jp', 'pe', 'es', 'my', 'ee', 'se', 'cis', 'tn', 'pk', 'ar',
       'gt', 'ma', 'ro', 'ie', 'do', 'mx', 'cz', 'ae', 'il', 'lv', 'au',
       'asia', 'bd', 'al', 'hu', 'bg', 'hk', 'lt', 'kz', 'at', 'tw', 'it',
       've', 'cr', 'dz', 'mv', 'ec', 'ua', 'jo', 'md', 'by', 'kh', 'sa',
       'qa', 'pr', 'ps', 'sk', 'ba', 'mk', 'za', 'no', 'pt', 'gr', 'wa',
       'hr', 'sy', 'lb', 'mn', 'nz', 'bh', 'si', 'py', 'uy', 'is', 'bn',
       'ir', 'hn', 'oce']
countries_full=['Chile', 'Russia', 'Indonesia', 'Brazil', 'South Korea', 'China', 'World', 'Belgium', 'Serbia', 'Germany',
       'Canada', 'Finland', 'United States', 'Thailand', 'Latin America', 'Great Britain', 'Philippines', 'France', 'Vietnam', 'Europe',
       'Netherlands', 'Colombia', 'Switzerland', 'Denmark', 'Turkey', 'Singapore', 'Poland', 'India', 'Kuwait', 'Sri Lanka', 'Egypt',
       'Latin America', 'Japan', 'Peru', 'Spain', 'Malaysia', 'Estonia', 'Sweden', 'CIS', 'Tunisia', 'Pakistan', 'Argentina',
       'Guatemala', 'Morocco', 'Romania', 'Ireland', 'Dominican Republic', 'Mexico', 'Czech Republic', 'United Arab Emirates', 'Israel', 'Latvia', 'Australia',
       'Asia', 'Bangladesh', 'Albania', 'Hungary', 'Bulgaria', 'Hong Kong', 'Lithuania', 'Kazakhstan', 'Austria', 'Taiwan', 'Italy',
       'Venezuela', 'Costa Rica', 'Algeria', 'Maldives', 'Ecuador', 'Ukraine', 'Jordan', 'Moldova', 'Belarus', 'Cambodia', 'Saudi Arabia',
       'Qatar', 'Puerto Rico', 'Palestine', 'Slovakia', 'Bosnia and Herzegovina', 'North Macedonia', 'South Africa', 'Norway', 'Portugal', 'Greece', 'Wales',
       'Croatia', 'Syria', 'Lebanon', 'Mongolia', 'New Zealand', 'Bahrain', 'Slovenia', 'Paraguay', 'Uruguay', 'Iceland', 'Brunei',
       'Iran', 'Honduras', 'Oceania']

country_dict = dict(zip(countries_short, countries_full))

In [23]:
len(countries_short)

100

In [24]:
len(countries_full)

100

In [25]:
len(country_dict)

100

In [26]:
valorant_data['country'] = valorant_data['country'].map(country_dict)

In [27]:
valorant_data.country.nunique()

99

In [28]:
cs_data.columns

Index(['player_name', 'country', 'team', 'total_rounds', 'kd', 'rating',
       'game'],
      dtype='object')

In [29]:
valorant_data.columns

Index(['player_name', 'country', 'team', 'total_rounds', 'kd', 'rating',
       'game'],
      dtype='object')

In [31]:
cs_data.to_csv('datasets/cleaned/esports/csgo/player_stats.csv', index=False)

In [32]:
valorant_data.to_csv('datasets/cleaned/esports/valorant/player_stats.csv', index=False)