<a href="https://colab.research.google.com/github/prasanna-venkatesh-m/mobile-price-range-classification/blob/NoPackages/Mobile_Price_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [99]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [100]:
df = pd.read_csv('/content/train.csv')

In [101]:
columns = df.columns
filtered_columns = columns.drop(['blue', 'dual_sim', 'four_g', 'talk_time', 'three_g', 'touch_screen', 'wifi', 'price_range' ])

In [102]:
scaler = StandardScaler()
scaled_data = df.copy()
scaled_data[filtered_columns] = scaler.fit_transform(scaled_data[filtered_columns])
print(scaled_data.head())

   battery_power  blue  clock_speed  ...  touch_screen  wifi  price_range
0      -0.902597     0     0.830779  ...             0     1            1
1      -0.495139     1    -1.253064  ...             1     0            2
2      -1.537686     1    -1.253064  ...             1     0            2
3      -1.419319     1     1.198517  ...             0     0            2
4       1.325906     1    -0.395011  ...             1     0            1

[5 rows x 21 columns]


In [103]:
X = scaled_data.drop(columns=['price_range'])
y = scaled_data['price_range']

In [104]:
def softmax(z):
  ez = np.exp(z)
  sm = ez / np.sum(ez, axis=1, keepdims=True)
  return sm

In [145]:
def cross_entropy(y, y_pred):
    epsilon = 1e-15  # small value to prevent log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
    loss = -np.mean(np.sum(y * np.log(y_pred), axis=1))
    return loss

In [106]:
def one_hot_encoding(y):
  oe = OneHotEncoder()
  y_encoded = oe.fit_transform(y.values.reshape(-1,1))
  return y_encoded

In [147]:
def multi_classification(X, y, tmp_w, tmp_b, max_iter=1000, alpha_rate=0.01):
    w = tmp_w
    b = tmp_b
    y_true = one_hot_encoding(y)
    y_true = y_true.toarray()
    cost_history = []

    for i in range(max_iter):
        z = np.dot(X, w) + b
        y_pred = softmax(z)
        cost = cross_entropy(y_true, y_pred)
        cost_history.append(cost)

        if i % 100 == 0:
            print(f'For Iteration {i} cost is {cost}')

        loss = y_pred - y_true
        dj_dw = np.dot(X.T, loss) / len(X)
        dj_db = np.sum(loss, axis=0) / len(X)

        w = w - (alpha_rate * dj_dw)
        b = b - (alpha_rate * dj_db)

    return w, b, cost_history

In [152]:
def predict(X, w, b):
  z = np.dot(X, w) + b
  y_pred = softmax(z)
  return np.argmax(y_pred, axis=1)

In [108]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.3)

In [109]:
print(X_train.shape[1])
print(len(y_train.unique()))

20
4


In [148]:
w,b, cost = multi_classification(X_train, y_train, np.random.rand(20,4), np.random.rand(1,4), 10000, 0.01 )

For Iteration 0 cost is 3.6279497095994975
For Iteration 100 cost is 1.5005867200363212
For Iteration 200 cost is 1.2819998499263818
For Iteration 300 cost is 1.1382128986523692
For Iteration 400 cost is 1.0397111096380147
For Iteration 500 cost is 0.9690094977165762
For Iteration 600 cost is 0.9159259174910541
For Iteration 700 cost is 0.8744227180242781
For Iteration 800 cost is 0.8408226798357925
For Iteration 900 cost is 0.8128178598264115
For Iteration 1000 cost is 0.7889133845558965
For Iteration 1100 cost is 0.7681099807229258
For Iteration 1200 cost is 0.7497188890293331
For Iteration 1300 cost is 0.7332513552346251
For Iteration 1400 cost is 0.7183509835393984
For Iteration 1500 cost is 0.7047512829117453
For Iteration 1600 cost is 0.6922483685884202
For Iteration 1700 cost is 0.6806829924101826
For Iteration 1800 cost is 0.6699284448545932
For Iteration 1900 cost is 0.6598822326195386
For Iteration 2000 cost is 0.6504602341447269
For Iteration 2100 cost is 0.6415925137281202


In [153]:
y_train_pred = predict(X_train, w, b)
print(y_train_pred)

[0 3 1 ... 2 0 2]


In [154]:
print(accuracy_score(y_train_pred, y_train))

0.9214285714285714
