# **MOBILE PRICE PREDICTION USING XGB**

# Importing packages and the dataset

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing the training data

In [None]:
train_data = pd.read_csv('/kaggle/input/mobile-price-classification/train.csv')
print('Number of Training Samples = {}'.format(train_data.shape[0]))

In [None]:
train_data.head()

# Importing the test data

In [None]:
test_data = pd.read_csv('/kaggle/input/mobile-price-classification/test.csv')
print('Number of Testing Samples = {}'.format(test_data.shape[0]))

In [None]:
test_data.head()

# Checking for missing or null values

In [None]:
print('\nMissing values of Train set:\n', train_data.isnull().sum())
print('\nNull values of Train set:\n', train_data.isna().sum())

# Listing all the available features

In [None]:
columns = train_data.columns
print(columns)

# Dealing with missing values

In [None]:
train_data.fillna(0,inplace = True)
test_data.fillna(0,inplace = True)

# Trying different combination of features

In [None]:
selected_feature_set_1 = ['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi'] # Validation accuracy 91.5%

selected_feature_set_2 = ['battery_power', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'n_cores', 'ram', 'talk_time', 'three_g',
       'touch_screen', 'wifi'] # Validation accuracy 79.25%

selected_feature_set_3 = ['battery_power', 'blue',
       'int_memory', 'ram', 'sc_h', 'sc_w',
       'touch_screen', 'wifi'] # Validation accuracy 79.25%

selected_feature_set_4 = ['battery_power', 'blue',  'dual_sim', 'four_g',
       'int_memory', 'm_dep', 'n_cores', 'pc', 'ram', 'sc_h', 'sc_w', 'three_g',
       'touch_screen', 'wifi'] # Validation accuracy 80.5%

selected_feature_set_5 = [
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi'] # Validation accuracy 77.75%

# Selecting features for the training data

In [None]:
X = train_data[selected_feature_set_1]
X.head()

# Creating the output label and checking number of classes

In [None]:
y = train_data['price_range']
y.head()

In [None]:
classes = set(y)
print(classes)

# Importing required packages

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Feature scaling

In [None]:
scaler = RobustScaler()
X = scaler.fit_transform(X)

# Training XGB Classifier

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state = 0)
print(y_train.shape, X_train.shape)
xgb_model = xgb.XGBClassifier(n_estimators=500)
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_val)
print('Validation accuracy: ', accuracy_score(y_val, y_pred))

# Preparing test data

In [None]:
X_test = test_data[selected_feature_set_1]
X_test.head()

# Obtaining predictions for test data

In [None]:
X_test = scaler.fit_transform(X_test)
future_y_pred = xgb_model.predict(X_test)
print(future_y_pred)