In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import lightgbm
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

random.seed(64)
np.random.seed(64)

In [None]:

def reduce_memory_usage(df):
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != 'object':
            c_min = df[col].min()
            c_max = df[col].max()
            
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    pass
        else:
            df[col] = df[col].astype('category')
    
    return df

In [None]:
train=pd.read_csv('../input/tabular-playground-series-dec-2021/train.csv')
test=pd.read_csv('../input/tabular-playground-series-dec-2021/test.csv')
reduce_memory_usage(train)
reduce_memory_usage(test);

In [None]:
print("dimensions of train: {}".format(train.shape))
print("dimensions of test: {}".format(test.shape))

In [None]:
train.describe()

In [None]:
train.info()

In [None]:
train.head()

In [None]:
print(train.isnull().sum())

In [None]:
plt.figure(figsize=(7,5))
sns.countplot(x='Cover_Type', data=train)

In [None]:
train['Cover_Type'].value_counts(ascending=False)

In [None]:
x = train.drop(columns=['Id','Cover_Type'])
y = train['Cover_Type']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=123, shuffle =True)

In [None]:
test_df = test.drop(columns=['Id'])

In [None]:
from xgboost import XGBClassifier

params = {
            'objective' : 'multi:softmax',
            'tree_method': 'gpu_hist',
            'eval_metric': 'mlogloss',
            'booster' : 'gbtree',
            'gamma' : 0.75,
            'max_depth': 7,
            'alpha': 10,
            'learning_rate': .007,
            'n_estimators':2000,
            'predictor': 'gpu_predictor'
        }         
           
          

xgb = XGBClassifier(**params)
xgb.fit(X_train, y_train,
          early_stopping_rounds=200,
          eval_set=[(X_val,y_val)],
          verbose=True)

In [None]:
y_pred=xgb.predict(X_val)

In [None]:
from sklearn.metrics import accuracy_score
print('Accuracy Score : ',accuracy_score(y_val, y_pred))

In [None]:
y_pred = xgb.predict(test_df)

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-dec-2021/sample_submission.csv')
submission['Cover_Type'] = y_pred
submission.to_csv("submission2.csv",index=False)
submission.head()

In [None]:
from catboost import CatBoostClassifier
model = CatBoostClassifier( task_type = 'GPU',devices = '0')
model.fit(X_train, y_train)

In [None]:
y_pred1=model.predict(X_val)

In [None]:
from sklearn.metrics import accuracy_score
print('Accuracy Score : ',accuracy_score(y_val, y_pred1))

In [None]:
y_pred1 = model.predict(test_df)

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-dec-2021/sample_submission.csv')
submission['Cover_Type'] = y_pred
submission.to_csv("submission1.csv",index=False)
submission.head()