In [None]:
#패키지 불러오기
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error
import gc
import lightgbm as lgb
import os

In [None]:
#데이터 불러오기
df_train = pd.read_csv('../input/pubg-finish-placement-prediction/train_V2.csv')
df_test = pd.read_csv('../input/pubg-finish-placement-prediction/test_V2.csv')

In [None]:
#메모리 줄이기
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """

    for col in df.columns:
        col_type = df[col].dtype

        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    return df

In [None]:
df_train = reduce_mem_usage(df_train)
df_test = reduce_mem_usage(df_test)

In [None]:
df_train

In [None]:
df_test

In [None]:
df_test.isnull().sum()

In [None]:
df_train.isnull().sum()

In [None]:
df_train[df_train["winPlacePerc"].isnull()]

In [None]:
df_train=df_train.dropna(inplace=False)

In [None]:
df_train[['Id', 'winPlacePerc']]

In [None]:
#훈련용 데이터 
df_train.info()

In [None]:
#시험용 데이터 
df_test.info()

In [None]:
print("Train :",df_train.shape)
print("Test :",df_test.shape)

In [None]:
df_train[df_train['rankPoints']== -1][["Id", "winPoints", "rankPoints", "killPoints"]]

In [None]:
df_train.describe().drop('count').T

In [None]:
df_train["numGroups"].value_counts()

In [None]:
corr = df_train.corr()
high = corr[(corr["winPlacePerc"]>0.3) | (corr["winPlacePerc"]< -0.3)]
plt.figure(figsize = (25, 7))
sns.heatmap(data = high, annot = True, fmt = '.2f', linewidths = .5, cmap = 'Blues')

In [None]:
corr[(corr["winPlacePerc"]>0.3) | (corr["winPlacePerc"]< -0.3)]

In [None]:
plt.figure(figsize = (20, 15))
sns.heatmap(data = corr, annot = True, fmt = '.2f', linewidths = .5, cmap = 'Blues')

In [None]:
df_train.loc[df_train.maxPlace > 0, "winPlacePerc"]

In [None]:
#피처 조합
df_train['totalDistance'] = df_train['rideDistance'] + df_train["walkDistance"] + df_train["swimDistance"]
df_train['healitem'] = df_train['heals'] + df_train['boosts']
df_train['killPlace_over_maxPlace'] = df_train['killPlace'] / df_train['maxPlace']
df_train["onekill"] = df_train["headshotKills"] + df_train["roadKills"]
df_train['kills_assists'] = df_train['kills'] + df_train['assists']

df_train["ELO"] = df_train["killPoints"] + df_train["rankPoints"] + df_train["winPoints"]
df_train["item"] = df_train["boosts"] + df_train["weaponsAcquired"]

df_train.loc[df_train.maxPlace == 0, "winPlacePerc"] = 0
df_train.loc[df_train.maxPlace == 1, "winPlacePerc"] = 1

df_train.loc[(df_train.maxPlace > 1) & (df_train.numGroups == 1), "winPlacePerc"] = 0

df_train.loc[df_train.winPlacePerc < 0, "winPlacePerc"] = 0
df_train.loc[df_train.winPlacePerc > 1, "winPlacePerc"] = 1

# fpp(first person perspective / 1인칭) :
- rank : squad-fpp / duo-fpp / solo-fpp
- normal : normal-squad-fpp / normal-duo-fpp / normal-solo-fpp 
- event mode : crashfpp / flarefpp


# tpp(third person perspective / 3인칭) :

- rank : squad / duo / solo
- normal : normal-squad / normal-duo / normal-solo
- event mode : flaretpp / crashtpp

In [None]:
#matchtype 숫자형 변환
#solo - 1 / duo - 2 / squad - 4
mapper = lambda x: 'solo' if ('solo' in x) else 'duo' if ('duo' in x) or ('crash' in x) else 'squad'
df_train['matchType'] = df_train['matchType'].apply(mapper)
mapper = lambda x: 1 if ('solo' in x) else 2 if ('duo' in x) or ('crash' in x) else 4
df_train["matchType"] = df_train['matchType'].apply(mapper)

In [None]:
df_train = reduce_mem_usage(df_train)
df_train

In [None]:
df_train.info()

In [None]:
#Y
df_target=df_train['winPlacePerc']
del df_train['winPlacePerc']

#문자형이라 제거
del df_train['Id']
del df_train['groupId']
del df_train['matchId']

In [None]:
df_target = df_target.to_frame()
df_target

In [None]:
print("X 데이터 형태 :", df_train.shape)
print("Y 데이터 형태 :", df_target.shape)

In [None]:
from sklearn.model_selection import train_test_split
X_trainval, X_test, y_trainval, y_test = train_test_split(df_train, df_target, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.2, random_state=42)

In [None]:
print("X_train 형태 :", X_train.shape)
print("X_test 형태 :", X_test.shape)
print("Y_train 형태 :", y_train.shape)
print("Y_test 형태 :", y_test.shape)

In [None]:
from sklearn.linear_model import RidgeCV
ridge_cv = RidgeCV(alphas = [0.01, 0.03, 0.06, 0.1, 0.3, 0.6, 
                             1, 3, 6, 10, 15, 25, 30, 35, 40, 45, 50, 
                             55, 60, 75, 100, 200, 500, 1000])
ridge_cv.fit(X_train, y_train)
alpha = ridge_cv.alpha_
print("Best alpha :", alpha)

In [None]:
from sklearn.linear_model import Ridge
ridge = Ridge(alpha=100)
ridge.fit(X_train, y_train)
print("학습용 데이터 세트 점수: {:.2f}".format(ridge.score(X_train, y_train)))
print("평가용 데이터 세트 점수: {:.2f}".format(ridge.score(X_test, y_test)))

In [None]:
from sklearn.metrics import mean_squared_error
predicted = ridge.predict(X_train)
mean_squared_error(y_train, predicted)

In [None]:
from sklearn.linear_model import Lasso
lasso = Lasso()
lasso.fit(X_train, y_train)
print("학습용 데이터 세트 점수: {:.2f}".format(lasso.score(X_train, y_train)))
print("평가용 데이터 세트 점수: {:.2f}".format(lasso.score(X_test, y_test)))
print("사용한 피처의 개수: ", np.sum(lasso.coef_ !=0))

In [None]:
from sklearn.metrics import mean_squared_error
predicted = lasso.predict(X_train)
mean_squared_error(y_train, predicted)

df_int_train=df_train.astype('int')
df_int_target=df_target.astype('int')

df_int_train = reduce_mem_usage(df_int_train)

print(df_int_train.dtypes)

from sklearn.ensemble import GradientBoostingClassifier
gbrt = GradientBoostingClassifier(random_state = 0)
gbrt.fit(X_train, y_train)
print("학습용 세트 점수: {:.3f}".format(gbrt.score(X_train, y_train)))
print("평가용 세트 점수: {:.3f}".format(gbrt.score(X_test, y_test)))

In [None]:
asa = df_test["Id"]

del df_test["Id"]
del df_test["groupId"]
del df_test["matchId"]
mapper = lambda x: 'solo' if ('solo' in x) else 'duo' if ('duo' in x) or ('crash' in x) else 'squad'
df_test['matchType'] = df_test['matchType'].apply(mapper)
mapper = lambda x: 1 if ('solo' in x) else 2 if ('duo' in x) or ('crash' in x) else 4
df_test["matchType"] = df_test['matchType'].apply(mapper)

In [None]:
df_test['totalDistance'] = df_test['rideDistance'] + df_test["walkDistance"] + df_test["swimDistance"]
df_test['healitem'] = df_test['heals'] + df_test['boosts']
df_test['killPlace_over_maxPlace'] = df_test['killPlace'] / df_test['maxPlace']
df_test["onekill"] = df_test["headshotKills"] + df_test["roadKills"]
df_test['kills_assists'] = df_test['kills'] + df_test['assists']

df_test["ELO"] = df_test["killPoints"] + df_test["rankPoints"] + df_test["winPoints"]
df_test["item"] = df_test["boosts"] + df_test["weaponsAcquired"]

df_test.loc[df_test.maxPlace == 0, "winPlacePerc"] = 0
df_test.loc[df_test.maxPlace == 1, "winPlacePerc"] = 1

df_test.loc[(df_test.maxPlace > 1) & (df_test.numGroups == 1), "winPlacePerc"] = 0

df_test.loc[df_test.winPlacePerc < 0, "winPlacePerc"] = 0
df_test.loc[df_test.winPlacePerc > 1, "winPlacePerc"] = 1

In [None]:
df_test = reduce_mem_usage(df_test)
df_test

In [None]:
del df_test["winPlacePerc"]

In [None]:
sub = ridge.predict(df_test)

In [None]:
subm = pd.DataFrame(sub)

In [None]:
asas = pd.DataFrame(asa)

In [None]:
final = pd.concat([asas, subm],axis=1)
final

In [None]:
pred=final.rename(columns={0:'winPlacePerc'})
pred

In [None]:
pred.to_csv('submission.csv',index=False)