# Importing Required Libaries

In [1]:
import pandas as pd
from numpy import mean
from numpy import std
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression


## Importing dataset

In [2]:
data= pd.read_csv("car.data", names=['buying', "maint", "doors", "persons", "lug_boot", "safety", "class"], index_col=False)

In [3]:
data

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [4]:
data.describe()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
count,1728,1728,1728,1728,1728,1728,1728
unique,4,4,4,3,3,3,4
top,med,med,3,2,med,med,unacc
freq,432,432,432,576,576,576,1210


In [5]:
data.dtypes

buying      object
maint       object
doors       object
persons     object
lug_boot    object
safety      object
class       object
dtype: object

## Independent Variables 

In [6]:
X = data[["maint", "doors", "lug_boot", "safety", "class"]]

### Converting categories to dummies

In [7]:
X = pd.get_dummies(data=X, drop_first=True)
X.head()

Unnamed: 0,maint_low,maint_med,maint_vhigh,doors_3,doors_4,doors_5more,lug_boot_med,lug_boot_small,safety_low,safety_med,class_good,class_unacc,class_vgood
0,0,0,1,0,0,0,0,1,1,0,0,1,0
1,0,0,1,0,0,0,0,1,0,1,0,1,0
2,0,0,1,0,0,0,0,1,0,0,0,1,0
3,0,0,1,0,0,0,1,0,1,0,0,1,0
4,0,0,1,0,0,0,1,0,0,1,0,1,0


## Target Variable

In [8]:
Y = data['buying']

# Multinomial Logistic Regression Model

In [9]:
# define the multinomial logistic regression model
model = LogisticRegression(multi_class='multinomial')
# define the model evaluation procedure
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate the model and collect the scores
n_scores = cross_val_score(model, X, Y, scoring='accuracy', cv=cv, n_jobs=-1)
# report the model performance
print('Mean Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

Mean Accuracy: 0.317 (0.033)


In [12]:
# make a prediction with a multinomial logistic regression model
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
# fit the model on the whole dataset
model.fit(X, Y)
# define a single row of input data
row = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0] # To reflect
# predict the class label
yhat = model.predict([row])
# summarize the predicted class
print('Predicted buying prices is: {}'.format(yhat[0]))

Predicted buying prices is: low
