# Classificação de celulares

### Importacao de bibliotecas

In [None]:
import numpy as np
import pandas as pd

In [None]:
#Carregar a base e visualizar primeiras linhas
df1 = pd.read_csv('mobile.csv')
df1.head()

In [None]:
# balanceamento da base
df1.price_range.value_counts()

In [None]:
#vdimnesões da base
df1.shape

### Indicar a semente inicial e para divisão da base em treino e teste

In [None]:
np.random.seed(0) # semente inicial
nlinhas = df1.shape[0]

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(df1.loc[:, df1.columns != 'price_range'], 
                                                    df1.price_range, 
                                                    test_size=0.3, 
                                                    stratify=df1.price_range)

In [None]:
# total do teste
0.3*2000

In [None]:
# proporção por classe
0.3*500

In [None]:
# Manter proporções das classes
print(y_test.value_counts())

In [None]:
y_test.shape

In [None]:
# alguns dados estatisticos por atributo
x_train.describe()

## **Normalizando os Dados**

In [None]:
from sklearn.preprocessing import StandardScaler
preprocessParams = StandardScaler().fit(x_train)
X_train_normalized = preprocessParams.transform(x_train)
X_test_normalized = preprocessParams.transform(x_test)

# Rede Neural

In [None]:
from keras import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import SGD

In [None]:
NumberOfClasses = len(y_train.unique())
NumberOfClasses

In [None]:
RN = Sequential()
RN.add(Dense(20, input_shape=X_train_normalized.shape[1:], activation='sigmoid'))
RN.add(Dense(NumberOfClasses, activation='sigmoid'))
RN.summary()

In [None]:
# treinamento
from tensorflow.keras.utils import to_categorical
sgd = SGD(learning_rate=0.1, decay=1e-6, momentum=0.9)
RN.compile(optimizer=sgd, loss='mean_squared_error', metrics=['accuracy'])
history = RN.fit(X_train_normalized,to_categorical(y_train), epochs = 150, validation_split=0.2)

In [None]:
score = RN.evaluate(X_test_normalized, to_categorical(y_test), verbose = 0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

In [None]:
# Gráfico do treinamento: custo do treino e validação
import matplotlib.pyplot as plt 
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
plt.title('Loss de treino e validação')
plt.ylabel('loss')
plt.xlabel('época')
plt.legend();

In [None]:
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='validation')
plt.title('Acurácia de treino e validação')
plt.ylabel('acurácia')
plt.xlabel('época')
plt.legend();

In [None]:
# Previsão
from sklearn.metrics import confusion_matrix
y_test_predicted = RN.predict(X_test_normalized)
y_test_predicted[0:5]

In [None]:
y_test_predicted_indexes = np.argmax(y_test_predicted,axis=1)
print(y_test_predicted_indexes[0:5])
y_test_indexes = y_test.values

In [None]:
# Matriz de confusão
confMatrix = confusion_matrix(y_test_predicted_indexes, y_test_indexes)
pd.DataFrame(confMatrix, index=['Pred 0', 'Pred 1','Pred 2','Pred 3'], 
             columns=['Real 0', 'Real 1', 'Real 2', 'Real 3'],) #matriz de confusão

In [None]:
import seaborn as sns
ax = plt.subplot()
sns.heatmap(confMatrix, annot=True, fmt=".0f")
plt.xlabel('Real')
plt.ylabel('Previsto')
plt.title('Matriz de Confusão')

# Colocar os nomes
ax.xaxis.set_ticklabels(['0', '1','2','3']) 
ax.yaxis.set_ticklabels(['0', '1','2','3'])
plt.show()