# Artificial Neural Network on Auto Import Data

In [2]:
# Import packages
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

In [4]:
# upload data file
df = pd.read_csv(r"C:\Users\Jwpel\Downloads\auto_imports_1985.csv")
df.head()

Unnamed: 0,symboling,normalized_losses,make,fuel_type,aspiration,num_doors,body_style,drive_wheels,engine_location,wheel_base,...,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
0,3,115,0,1,0,1,0,2,0,88.6,...,130,5,3.47,2.68,9.0,111,5000,21,27,13495
1,3,115,0,1,0,1,0,2,0,88.6,...,130,5,3.47,2.68,9.0,111,5000,21,27,16500
2,1,115,0,1,0,1,2,2,0,94.5,...,152,5,2.68,3.47,9.0,154,5000,19,26,16500
3,2,164,1,1,0,0,3,1,0,99.8,...,109,5,3.19,3.4,10.0,102,5500,24,30,13950
4,2,164,1,1,0,0,3,0,0,99.4,...,136,5,3.19,3.4,8.0,115,5500,18,22,17450


In [5]:
# shape of data
df.shape

(205, 26)

In [6]:
# info on columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
symboling            205 non-null int64
normalized_losses    205 non-null int64
make                 205 non-null int64
fuel_type            205 non-null int64
aspiration           205 non-null int64
num_doors            205 non-null int64
body_style           205 non-null int64
drive_wheels         205 non-null int64
engine_location      205 non-null int64
wheel_base           205 non-null float64
length               205 non-null float64
width                205 non-null float64
height               205 non-null float64
curb_weight          205 non-null int64
engine_type          205 non-null int64
num_cylinders        205 non-null int64
engine_size          205 non-null int64
fuel_system          205 non-null int64
bore                 205 non-null float64
stroke               205 non-null float64
compression_ratio    205 non-null float64
horsepower           205 non-null int64
p

In [8]:
# frequency table on target variable
df["symboling"].value_counts()

 0    67
 1    54
 2    32
 3    27
-1    22
-2     3
Name: symboling, dtype: int64

In [10]:
# predictor and target variables assigned
X = df.drop(["symboling"], axis=1)
y = df["symboling"]

In [20]:
# data split to training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .20, random_state = 9)

In [21]:
# training and test data scaled
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

  return self.partial_fit(X, y)
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


In [41]:
# neural network created
mlp = MLPClassifier(solver = "sgd", hidden_layer_sizes = (40,40,40), max_iter=4000, learning_rate="adaptive")
mlp.fit(X_train, y_train)
mlp_predict = mlp.predict(X_test)

In [42]:
# accuracy score 
accuracy = accuracy_score(y_test, mlp_predict)
accuracy

0.7560975609756098

In [43]:
# predictions on test set
y_test.value_counts()

 0    16
 1    10
 2     7
 3     4
-1     3
-2     1
Name: symboling, dtype: int64

In [44]:
# confusion matrix
matrix = confusion_matrix(y_test, mlp_predict)
conf_mat = pd.DataFrame(data = matrix, columns = [["Pred:-2", "Pred:-1", "Pred:0", "Pred:1", "Pred:2", "Pred:3"]],
                       index = [["Act:-2", "Act:-1", "Act:0", "Act:1", "Act:2", "Act:3"]])
conf_mat

Unnamed: 0,Pred:-2,Pred:-1,Pred:0,Pred:1,Pred:2,Pred:3
Act:-2,0,1,0,0,0,0
Act:-1,0,3,0,0,0,0
Act:0,0,2,11,2,0,1
Act:1,0,0,0,9,1,0
Act:2,0,0,2,1,4,0
Act:3,0,0,0,0,0,4
