In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical

In [2]:
# Load the dataset
df = pd.read_csv("train.csv")

In [3]:
# Correct column name
df.columns = df.columns.str.strip()  # Remove any leading/trailing spaces in column names
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [4]:
df.isnull().sum()

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64

In [5]:
print(df.isna().sum())

battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64


In [6]:
df.shape

(2000, 21)

In [7]:
df['price_range'].value_counts()

1    500
2    500
3    500
0    500
Name: price_range, dtype: int64

In [8]:
df = pd.get_dummies(df, columns=['price_range','blue', 'dual_sim', 'four_g', 'three_g', 'touch_screen', 'wifi'])
df.head()

Unnamed: 0,battery_power,clock_speed,fc,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,...,dual_sim_0,dual_sim_1,four_g_0,four_g_1,three_g_0,three_g_1,touch_screen_0,touch_screen_1,wifi_0,wifi_1
0,842,2.2,1,7,0.6,188,2,2,20,756,...,1,0,1,0,1,0,1,0,0,1
1,1021,0.5,0,53,0.7,136,3,6,905,1988,...,0,1,0,1,0,1,0,1,1,0
2,563,0.5,2,41,0.9,145,5,6,1263,1716,...,0,1,0,1,0,1,0,1,1,0
3,615,2.5,0,10,0.8,131,6,9,1216,1786,...,1,0,1,0,0,1,1,0,1,0
4,1821,1.2,13,44,0.6,141,2,14,1208,1212,...,1,0,0,1,0,1,0,1,1,0


In [9]:
df.columns

Index(['battery_power', 'clock_speed', 'fc', 'int_memory', 'm_dep',
       'mobile_wt', 'n_cores', 'pc', 'px_height', 'px_width', 'ram', 'sc_h',
       'sc_w', 'talk_time', 'price_range_0', 'price_range_1', 'price_range_2',
       'price_range_3', 'blue_0', 'blue_1', 'dual_sim_0', 'dual_sim_1',
       'four_g_0', 'four_g_1', 'three_g_0', 'three_g_1', 'touch_screen_0',
       'touch_screen_1', 'wifi_0', 'wifi_1'],
      dtype='object')

In [10]:
# Define features and target variable
X = df.drop(columns = ['price_range_0','price_range_1', 'price_range_2', 'price_range_3'], axis=1)
y = df.loc[:, ['price_range_0','price_range_1', 'price_range_2', 'price_range_3']]

In [39]:
y

Unnamed: 0,price_range_0,price_range_1,price_range_2,price_range_3
0,0,1,0,0
1,0,0,1,0
2,0,0,1,0
3,0,0,1,0
4,0,1,0,0
...,...,...,...,...
1995,1,0,0,0
1996,0,0,1,0
1997,0,0,0,1
1998,1,0,0,0


In [11]:
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print('Shape of X_train and X_test:', X_train.shape, X_test.shape)
print('Shape of y_train and y_test:', y_train.shape, y_test.shape)

Shape of X_train and X_test: (1600, 26) (400, 26)
Shape of y_train and y_test: (1600, 4) (400, 4)


In [12]:
# Identify numerical and binary features
numerical_features = ['battery_power', 'clock_speed', 'int_memory', 'm_dep', 'fc', 'mobile_wt', 
                      'n_cores', 'pc', 'px_height', 'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time']

# Initialize the scaler
scaler = StandardScaler()

# Scale the numerical features in X_train
X_train_num_scaled = scaler.fit_transform(X_train[numerical_features])
X_test_num_scaled = scaler.transform(X_test[numerical_features])

# Convert the scaled numerical features back to DataFrame to keep column names
X_train_num_scaled = pd.DataFrame(X_train_num_scaled, columns=numerical_features, index=X_train.index)
X_test_num_scaled = pd.DataFrame(X_test_num_scaled, columns=numerical_features, index=X_test.index)

# Drop original numerical columns from X_train and X_test
X_train = X_train.drop(columns=numerical_features)
X_test = X_test.drop(columns=numerical_features)

# Concatenate the scaled numerical features back with the rest of X_train and X_test
X_train = pd.concat([X_train, X_train_num_scaled], axis=1)
X_test = pd.concat([X_test, X_test_num_scaled], axis=1)

# Now X_train and X_test have the scaled numerical features and the one-hot encoded binary features
print('Shape of X_train and X_test:', X_train.shape, X_test.shape)
print('Shape of y_train and y_test:', y_train.shape, y_test.shape)

Shape of X_train and X_test: (1600, 26) (400, 26)
Shape of y_train and y_test: (1600, 4) (400, 4)


In [13]:
# Store mean and std for each feature
means = scaler.mean_
stds = scaler.scale_
means

array([1.24080875e+03, 1.51362500e+00, 3.22700000e+01, 5.02562500e-01,
       4.31000000e+00, 1.40633750e+02, 4.54250000e+00, 9.87812500e+00,
       6.44226250e+02, 1.24909188e+03, 2.11613125e+03, 1.22200000e+01,
       5.70562500e+00, 1.09568750e+01])

In [14]:
stds

array([4.40589647e+02, 8.19932533e-01, 1.81894777e+01, 2.86792056e-01,
       4.33793153e+00, 3.53271264e+01, 2.28925616e+00, 6.01296695e+00,
       4.45297697e+02, 4.31395297e+02, 1.08037355e+03, 4.20405756e+00,
       4.33750716e+00, 5.50602082e+00])

In [15]:
DENSE1_SIZE = 32
DENSE2_SIZE = 16
NUM_OF_EPOCHS = 50 
BATCH_SIZE = 8

model = tf.keras.Sequential()

input_shape = X.shape[1]
print(input_shape)
model.add(tf.keras.layers.Flatten(input_shape =(X.shape[1],)))

model.add(tf.keras.layers.Dense(DENSE1_SIZE, activation='relu'))


output_shape = y.shape[1]
print(output_shape)
model.add(tf.keras.layers.Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()

26
4
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 26)                0         
                                                                 
 dense (Dense)               (None, 32)                864       
                                                                 
 dense_1 (Dense)             (None, 4)                 132       
                                                                 
Total params: 996
Trainable params: 996
Non-trainable params: 0
_________________________________________________________________


In [16]:
history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, 
                    epochs=NUM_OF_EPOCHS,
                    verbose=1, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [25]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)
print("Test Loss:", loss)


Test Accuracy: 0.9574999809265137
Test Loss: 0.09543399512767792


In [26]:
# Save the model
model.save('MobilePriceClassifyModel.h5')

In [27]:
def representative_dataset():
    for _ in range(100):
      data =  X_test
      yield [data.astype(np.float32)]
        
print(representative_dataset())

<generator object representative_dataset at 0x00000163565CAD68>


In [28]:
tf.saved_model.save(model, "saved_mobile_seq_model_keras_dir")
converter = tf.lite.TFLiteConverter.from_saved_model("saved_mobile_seq_model_keras_dir")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset

tflite_model = converter.convert()

INFO:tensorflow:Assets written to: saved_mobile_seq_model_keras_dir\assets


INFO:tensorflow:Assets written to: saved_mobile_seq_model_keras_dir\assets


In [29]:
with open('MobilePriceClassifyModel.tflite', 'wb') as f:
  f.write(tflite_model)

In [30]:
interpreter = tf.lite.Interpreter(model_path="MobilePriceClassifyModel.tflite")
interpreter.allocate_tensors()

In [31]:
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print('input_details:\n', input_details)
print('output_details:\n', output_details)

input_details:
 [{'name': 'serving_default_flatten_input:0', 'index': 0, 'shape': array([ 1, 26]), 'shape_signature': array([-1, 26]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
output_details:
 [{'name': 'StatefulPartitionedCall:0', 'index': 11, 'shape': array([1, 4]), 'shape_signature': array([-1,  4]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [42]:
X_train.iloc[:4, ].to_csv("sample.csv", index=False)

In [38]:
# Test the model on random input data
input_shape = input_details[0]['shape']

# Random Input Test
input0_data = np.random.random_sample(input_shape).astype(np.float32)
print("Random input0_data:", input0_data)
interpreter.set_tensor(input_details[0]['index'], input0_data)
interpreter.invoke()
output0_data = interpreter.get_tensor(output_details[0]['index'])
print("TFLite Model output for random input0_data:", output0_data)

# Verify with original Keras model
keras_output0_data = model.predict(input0_data)
print("Keras Model output for random input0_data:", keras_output0_data)

# Custom Test Input 1
input1_data = np.array([[1,0,0,1,1,0,0,1,0,1,0,1,1.5483596911931745,-1.2362297611990014,0.7548320081354454,-0.008935045246663081,0.620111216247627,1.425710356522435,-1.547445876673227,0.02026869615452839,0.2757116213968109,1.181997413205301,-0.5804763077229776,-0.5280612756351722,0.759508832792765,-1.4451225785068857]], dtype=np.float32)
print("\nCustom input1_data:", input1_data)
interpreter.set_tensor(input_details[0]['index'], input1_data)
interpreter.invoke()
output1_data = interpreter.get_tensor(output_details[0]['index'])
print("TFLite Model output for custom input1_data:", output1_data)

# Verify with original Keras model
keras_output1_data = model.predict(input1_data)
print("Keras Model output for custom input1_data:", keras_output1_data)

# Custom Test Input 2
input2_data = np.array([[0,1,1,0,0,1,0,1,0,1,0,1,-1.3795347982761568,0.8371115573737471,0.9197625270288421,-1.4036738154575337,-0.9935610936904357,-0.04624633152097916,1.510315912095323,-1.4764965583337306,-0.2587622860340611,-0.5565472705139002,1.3364532557364845,-0.2901958361598695,-1.0848685262942002,0.9159291560735261]], dtype=np.float32)
print("\nCustom input2_data:", input2_data)
interpreter.set_tensor(input_details[0]['index'], input2_data)
interpreter.invoke()
output2_data = interpreter.get_tensor(output_details[0]['index'])
print("TFLite Model output for custom input2_data:", output2_data)

# Verify with original Keras model
keras_output2_data = model.predict(input2_data)
print("Keras Model output for custom input2_data:", keras_output2_data)

Random input0_data: [[0.99472827 0.12711795 0.30644953 0.9852901  0.03399667 0.4070768
  0.87210685 0.8088839  0.9857608  0.793143   0.26953802 0.11912376
  0.9702564  0.7488302  0.24202262 0.09072242 0.69194186 0.13692406
  0.26782095 0.9166444  0.3228377  0.08546323 0.3780372  0.42000934
  0.39746937 0.9088032 ]]
TFLite Model output for random input0_data: [[0.         0.         0.98046875 0.01953125]]
Keras Model output for random input0_data: [[9.3765097e-16 4.8534167e-07 9.8460478e-01 1.5394819e-02]]

Custom input1_data: [[ 1.          0.          0.          1.          1.          0.
   0.          1.          0.          1.          0.          1.
   1.5483596  -1.2362298   0.754832   -0.00893505  0.6201112   1.4257103
  -1.5474459   0.0202687   0.27571163  1.1819974  -0.5804763  -0.5280613
   0.75950885 -1.4451226 ]]
TFLite Model output for custom input1_data: [[0.    0.875 0.125 0.   ]]
Keras Model output for custom input1_data: [[7.4067628e-07 8.8681602e-01 1.1318328e-01 4.

In [44]:
import os

# Define the file paths
keras_model_path = "MobilePriceClassifyModel.h5"
tflite_model_path = "MobilePriceClassifyModel.tflite"

# Get file sizes in KB
keras_model_size = os.path.getsize(keras_model_path) / 1024  # Size in KB
tflite_model_size = os.path.getsize(tflite_model_path) / 1024  # Size in KB

print(f"Keras model size: {keras_model_size:.2f} KB")
print(f"TFLite model size: {tflite_model_size:.2f} KB")

Keras model size: 36.19 KB
TFLite model size: 3.63 KB


In [40]:
# Function to convert some hex values into an array for C programming
import time, sys

# Function to convert some hex values into an array for C programming
def hex_to_c_array(hex_data, var_name):
    c_str = ""

    # Create header guard
    c_str += '#ifndef ' + var_name.upper() + '_H\n'
    c_str += "#define " + var_name.upper() + '_H\n\n'

    c_str += "/*\n Author: Pranav P Kulkarni \n"
    c_str += " CAUTION: This is an auto generated file.\n DO NOT EDIT OR MAKE ANY CHANGES TO IT.\n"

# Time stamping of this model data in the generated file
    localtime = time.asctime( time.localtime(time.time()) )
    c_str += " This model data was generated on " + localtime+ '\n\n'
    print("This model data was generated on:", localtime)

# Add information about the verisons of tools and packages used in generating this header file
    c_str += " Tools used:\n Python:" + str(sys.version) + "\n Numpy:" + str(np.version.version) + \
          "\n TensorFlow:" + str(sys.version) + "\n Keras: "+ str(tf.keras.__version__) + "\n\n"
    print("Tools used: Python:", sys.version, "\n Numpy:", np.version.version, \
          "\n TensorFlow:", sys.version, "\n Keras: ", tf.keras.__version__, "\n\n")

# Training details of the model
    c_str += ' Model details are:\n'
    c_str += ' NUM_OF_EPOCHS = ' + str(NUM_OF_EPOCHS) + '\n'
    c_str += ' BATCH_SIZE    = ' + str(BATCH_SIZE) + '\n*/\n'
    
# Generate 'C' constants for the no. of nodes in each layer
    c_str += '\nconst int ' + 'DENSE1_SIZE' + ' = ' + str(DENSE1_SIZE) + ';\n'
    c_str +=   'const int ' + 'DENSE2_SIZE' + ' = ' + str(DENSE2_SIZE) + ';\n'      
    
    # Add array length at the top of the file
    c_str += '\nconst unsigned int ' + var_name + '_len = ' + str(len(hex_data)) + ';\n'

    # Declare C variable
    c_str += 'alignas(8) const unsigned char ' + var_name + '[] = {'
    hex_array = []
    for i, val in enumerate(hex_data):
        # Construct string from hex
        hex_str = format(val, '#04x')

        # Add formating so each line stays within 80 characters
        if (i + 1) < len(hex_data):
          hex_str += ','
        if (i + 1) % 12 == 0:
          hex_str += '\n'
        hex_array.append(hex_str)

    # Add closing brace
    c_str += '\n' + format(''.join(hex_array)) + '\n};\n\n'

    # Close out header guard
    c_str += '#endif //' + var_name.upper() + '_H'

    return c_str

In [41]:
# Write TFLite model to a C source (or header) file
with open("MobilePriceClassify_model_esp32" + '.h', 'w') as file:
  file.write(hex_to_c_array(tflite_model, "MobilePriceClassify_model_esp32"))

This model data was generated on: Sat Nov  9 21:47:08 2024
Tools used: Python: 3.7.1 (default, Oct 28 2018, 08:39:03) [MSC v.1912 64 bit (AMD64)] 
 Numpy: 1.21.5 
 TensorFlow: 3.7.1 (default, Oct 28 2018, 08:39:03) [MSC v.1912 64 bit (AMD64)] 
 Keras:  2.10.0 


