In [None]:
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(10,6)})
sns.set(font_scale=1.3)
plt.style.use('fivethirtyeight')

from sklearn.model_selection import train_test_split, GridSearchCV

from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from xgboost import XGBClassifier, XGBRegressor, XGBRFRegressor
from sklearn.metrics import accuracy_score, confusion_matrix, r2_score

import warnings
warnings.filterwarnings('ignore')

# <p style="background-color:#80ccff; font-family:newtimeroman; font-size:150%; text-align:center; border-radius:  80px 5px; padding-top:8px; padding-bottom:8px;">Input</p>

In [None]:
train = pd.read_csv('../input/mobile-price-classification/train.csv')

In [None]:
train.head()

# <p style="background-color:#80ccff; font-family:newtimeroman; font-size:150%; text-align:center; border-radius:  80px 5px; padding-top:8px; padding-bottom:8px;">Exploratory Analysis</p>

In [None]:
# The dataset don't have missing values
train.info()

In [None]:
# We can see that the ram variable has a strong positive correlation
plt.figure(figsize=(18,15))
sns.heatmap(train.corr(), cmap=plt.cm.Reds, cbar_kws={'shrink': .6}, square=True, annot=True, fmt='.2f', linewidths=.8)
plt.show()

In [None]:
columns = train.columns
plt.subplots(figsize=(20,50))
length = len(columns)

for i, j in zip(columns, range(length)):
    plt.subplot((length/2), 3, j+1)
    plt.subplots_adjust(wspace=.2, hspace=.5)
    sns.distplot(train[i])
    plt.title(i)

In [None]:
plt.subplots(figsize=(20,50))
for i, j in zip(columns, range(length)):
    plt.subplot((length/2), 3, j+1)
    plt.subplots_adjust(wspace=.2, hspace=.4)
    sns.boxplot(y=train[i])
    plt.title(i)

In [None]:
plt.subplots(figsize=(20,50))
for i, j in zip(columns, range(length)):
    plt.subplot((length/2), 3, j+1)
    plt.subplots_adjust(wspace=.2, hspace=.6)
    sns.scatterplot(x=train['price_range'],y=train[i])
    plt.title(i)

# <p style="background-color:#80ccff; font-family:newtimeroman; font-size:150%; text-align:center; border-radius:  80px 5px; padding-top:8px; padding-bottom:8px;">Model<p>

In [None]:
x = train.drop('price_range', axis=1)
y = train['price_range']

# Normalization
x = x/255

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2)

In [None]:
colunas = ['Modelo','Acuracy']
resultado = pd.DataFrame(columns=colunas)

models = []

models.append(('GradientBoostingClassifier', GradientBoostingClassifier()))
models.append(('AdaBoostClassifier', AdaBoostClassifier()))
models.append(('ExtraTreesClassifier', ExtraTreesClassifier()))
models.append(('BaggingClassifier', BaggingClassifier()))
models.append(('RandomForestClassifier', RandomForestClassifier()))
models.append(('DecisionTreeClassifier', DecisionTreeClassifier()))
models.append(('ExtraTreeClassifier', ExtraTreeClassifier()))
models.append(("XGBClassifier", XGBClassifier()))

for name, model in models:
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    acc = accuracy_score(y_test, y_pred)
    resultado = resultado.append(pd.DataFrame([[name, acc]], columns=colunas))
    
resultado.sort_values(by=['Acuracy'], ascending=False, inplace=True)
resultado

In [None]:
colunas = ['Modelo','R2']
resultado = pd.DataFrame(columns=colunas)

models = []

models.append(('GradientBoostingRegressor', GradientBoostingRegressor()))
models.append(('AdaBoostRegressor', AdaBoostRegressor()))
models.append(('ExtraTreesRegressor', ExtraTreesRegressor()))
models.append(('BaggingRegressor', BaggingRegressor()))
models.append(('RandomForestRegressor', RandomForestRegressor()))
models.append(("XGBRegressor", XGBRegressor()))
models.append(("XGBRFRegressor", XGBRFRegressor()))

for name, model in models:
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    acc = r2_score(y_test, y_pred)
    resultado = resultado.append(pd.DataFrame([[name, acc]], columns=colunas))
    
resultado.sort_values(by=['R2'], ascending=False, inplace=True)
resultado