# Preprocessing data

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.colors 

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import accuracy_score, log_loss
from sklearn.model_selection import train_test_split

from tqdm import tqdm_notebook

In [2]:
my_cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red", "yellow", "green"])
import seaborn as sns
sns.set()

In [3]:
df = pd.read_csv("mobile_cleaned.csv")

In [4]:
PhoneId = df["PhoneId"]
Rating = df["Rating"]
df = df.drop("PhoneId", axis=1).drop("Rating", axis=1)

In [5]:
Rating = Rating.values

In [6]:
min_max_scaler = MinMaxScaler()
Rating_s = min_max_scaler.fit_transform(Rating.reshape(-1,1))
threshold = 4.1
threshold_s = min_max_scaler.transform(np.array(threshold).reshape(-1,1))
Rating_b = list(map(lambda x:0 if x<threshold_s else 1, Rating_s))

In [7]:
dfo = df.copy()

In [8]:
dfo["Pixel Density"] = pd.qcut(np.array(dfo["Pixel Density"]), q=3, labels=[-1,0,1])
dfo["Screen Size"] = pd.qcut(np.array(dfo["Screen Size"]), q=3, labels=[-1,0,1])
dfo["Weight"] = pd.qcut(np.array(dfo["Weight"]), q=3, labels=[-1,0,1])
dfo["RAM"] = pd.qcut(np.array(dfo["RAM"]), q=3, labels=[-1,0,1])
dfo["Processor_frequency"] = pd.qcut(np.array(dfo["Processor_frequency"]), q=3, labels=[-1,0,1])
dfo["Screen to Body Ratio (calculated)"] = pd.qcut(np.array(dfo["Screen to Body Ratio (calculated)"]), q=3, labels=[-1,0,1])
dfo["Height"] = pd.qcut(np.array(dfo["Height"]), q=3, labels=[-1,0,1])
dfo["Internal Memory"] = pd.qcut(np.array(dfo["Internal Memory"]), q=3, labels=[-1,0,1])
dfo["Capacity"] = pd.qcut(np.array(dfo["Capacity"]), q=3, labels=[-1,0,1])
dfo["Resolution"] = pd.qcut(np.array(dfo["Resolution"]), q=3, labels=[-1,0,1])

In [9]:
dfo["Pixel Density"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["Pixel Density"]))))
dfo["Screen Size"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["Screen Size"]))))
dfo["Weight"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["Weight"]))))
dfo["RAM"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["RAM"]))))
dfo["Processor_frequency"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["Processor_frequency"]))))
dfo["Screen to Body Ratio (calculated)"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["Screen to Body Ratio (calculated)"]))))
dfo["Height"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["Height"]))))
dfo["Internal Memory"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["Internal Memory"]))))
dfo["Capacity"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["Capacity"]))))
dfo["Resolution"] = np.array(list(map(lambda x:1 if x>0 else 0, np.array(dfo["Resolution"]))))

# Train-test split

In [10]:
mpX_train, mpX_val, mpY_train, mpY_val = train_test_split(df, Rating_b, stratify=Rating_b, test_size=0.25)

In [11]:
mpX_train = mpX_train.values

# MPNeuron class

In [12]:
class MPNeuron:
  def __init__(self):
    self.b = 0

  def model(self, x):
    return int(np.sum(x)>self.b)

  def predict(self, X):
    Y_pred = []
    for x in X:
      Y_pred.append(self.model(x))
    return Y_pred

  def fit(self, X, Y, display_accuracy=True):
    accuracy = []
    for b in range(X.shape[1]):
      self.b = b
      Y_pred = self.predict(X)
      accuracy.append(accuracy_score(Y_pred, Y))
    k = np.argmax(accuracy)
    self.b = k
    if display_accuracy:
      plt.plot(accuracy)   

In [13]:
mpn = MPNeuron()
mpn.fit(mpX_train, mpY_train, display_accuracy=False)

In [14]:
mpX_val = mpX_val.values
print(accuracy_score(mpn.predict(mpX_val), mpY_val))

0.5813953488372093
