In [1]:
!pip install ucimlrepo
!pip install altair



In [2]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
car_evaluation = fetch_ucirepo(id=19) 
  
# data (as pandas dataframes) 
data = car_evaluation.data.features 
y = car_evaluation.data.targets 
  
# metadata 
print(car_evaluation.metadata) 
  
# variable information 
print(car_evaluation.variables) 

{'uci_id': 19, 'name': 'Car Evaluation', 'repository_url': 'https://archive.ics.uci.edu/dataset/19/car+evaluation', 'data_url': 'https://archive.ics.uci.edu/static/public/19/data.csv', 'abstract': 'Derived from simple hierarchical decision model, this database may be useful for testing constructive induction and structure discovery methods.', 'area': 'Other', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1728, 'num_features': 6, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1988, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5JP48', 'creators': ['Marko Bohanec'], 'intro_paper': {'title': 'Knowledge acquisition and explanation for multi-attribute decision making', 'authors': 'M. Bohanec, V. Rajkovič', 'published_in': '8th Intl Workshop on Expert Systems and their Applications, Avignon, France', 'yea

CAR                      car acceptability
. PRICE                  overall price
. . buying               buying price
. . maint                price of the maintenance
. TECH                   technical characteristics
. . COMFORT              comfort
. . . doors              number of doors
. . . persons            capacity in terms of persons to carry
. . . lug_boot           the size of luggage boot
. . safety               estimated safety of the cara

In [3]:
import altair

In [4]:
data

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety
0,vhigh,vhigh,2,2,small,low
1,vhigh,vhigh,2,2,small,med
2,vhigh,vhigh,2,2,small,high
3,vhigh,vhigh,2,2,med,low
4,vhigh,vhigh,2,2,med,med
...,...,...,...,...,...,...
1723,low,low,5more,more,med,med
1724,low,low,5more,more,med,high
1725,low,low,5more,more,big,low
1726,low,low,5more,more,big,med


In [5]:
new_columns = ["Buying Price", "Maintenance","Number of Doors", "Passenger Capacity", "Luggage Capacity Size", "Safety"]

In [6]:
data.columns = new_columns

In [7]:
y

Unnamed: 0,class
0,unacc
1,unacc
2,unacc
3,unacc
4,unacc
...,...
1723,good
1724,vgood
1725,unacc
1726,good


In [8]:
data["target"] = y

In [9]:
data

Unnamed: 0,Buying Price,Maintenance,Number of Doors,Passenger Capacity,Luggage Capacity Size,Safety,target
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [10]:
import altair as alt


alt.Chart(data).mark_point().encode(
    x='maint:O',  # specify nominal data
    y='buying:O',
)

In [11]:
import pandas as pd
import numpy as np


In [12]:
data.shape[1]

7

In [13]:
data["count"] = np.ones(data.shape[0], int)


In [14]:
data

Unnamed: 0,Buying Price,Maintenance,Number of Doors,Passenger Capacity,Luggage Capacity Size,Safety,target,count
0,vhigh,vhigh,2,2,small,low,unacc,1
1,vhigh,vhigh,2,2,small,med,unacc,1
2,vhigh,vhigh,2,2,small,high,unacc,1
3,vhigh,vhigh,2,2,med,low,unacc,1
4,vhigh,vhigh,2,2,med,med,unacc,1
...,...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good,1
1724,low,low,5more,more,med,high,vgood,1
1725,low,low,5more,more,big,low,unacc,1
1726,low,low,5more,more,big,med,good,1


In [15]:
melty_dict = {}

for i in data.columns:
    for j in data.columns:
        if (i == j) or (i == "count") or (j == "count"):
            pass
        else:
            var = str(i)+"_"+str(j)
            #melty_dict[var] = data.melt(id_vars=str(i), var_name=str(j), value_name='count')
            melty_dict[var] = data[[str(i), str(j), "count"]].groupby(by=[str(i), str(j)]).sum().reset_index()

In [16]:
#from sklearn.preprocessing import OrdinalEncoder

In [17]:
data.shape

(1728, 8)

In [18]:
108 * 16

1728

In [19]:
viz_list = []
for i, j in melty_dict.items():
    if len(set(j["count"].tolist())) == 1:
        pass
    else:
        x = melty_dict[i].columns[0] + ":N"
        y = melty_dict[i].columns[2] + ":Q"
        colorino = melty_dict[i].columns[1] + ":N"
        viz_list.append(alt.Chart(melty_dict[i]).mark_bar().encode(
            x=x,
            y=y,
            color=colorino,
            #column='site:N'
        ))

In [20]:
target_chart = alt.Chart(data[["target","count"]].groupby(by="target").sum().reset_index()).mark_bar().encode(x="target", y="count")

### First Chart
This is the first set of charts I created in altair

In [21]:
 viz_list[0] | viz_list[1] | viz_list[2] | viz_list[3] | viz_list[4] | viz_list[5]

In [22]:
target_chart

In [None]:
### First Round
This is after the first round of feedback from the thinkaloud session.

In [23]:
viz_list = []
for i, j in melty_dict.items():
    if len(set(j["count"].tolist())) == 1:
        pass
    else:
        var_list = i.split("_")
        title = alt.TitleParams(f'{str.upper(var_list[0])} to Vehicle Rating', anchor='middle')
        x = alt.X(melty_dict[i].columns[0] + ":N", title="")
        y = alt.Y(melty_dict[i].columns[2] + ":Q", title="Car Counts")
        colorino = alt.Color(melty_dict[i].columns[1] + ":N", title="")
        size = melty_dict[i].columns[2] + ":Q"
        viz_list.append(alt.Chart(melty_dict[i], title=title).mark_circle().encode(
            x=x,
            y=y,
            color=colorino,
            size=size,
        ))

In [24]:
viz_list[3] | viz_list[4] | viz_list[5] | viz_list[0] | viz_list[1] | viz_list[2] 

In [25]:
data

Unnamed: 0,Buying Price,Maintenance,Number of Doors,Passenger Capacity,Luggage Capacity Size,Safety,target,count
0,vhigh,vhigh,2,2,small,low,unacc,1
1,vhigh,vhigh,2,2,small,med,unacc,1
2,vhigh,vhigh,2,2,small,high,unacc,1
3,vhigh,vhigh,2,2,med,low,unacc,1
4,vhigh,vhigh,2,2,med,med,unacc,1
...,...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good,1
1724,low,low,5more,more,med,high,vgood,1
1725,low,low,5more,more,big,low,unacc,1
1726,low,low,5more,more,big,med,good,1


### Second Round
This is after the second round of feedback from the thinkaloud session.

In [26]:
data['target'] = data['target'].replace({'acc':'acceptable', 'unacc':'unacceptable', 'vgood':'very good'})
data["Buying Price"] = data["Buying Price"].replace({'low':'1. low', 
                                                     'med':'2. med',
                                                     'high':'3. high',
                                                     'vhigh':'4. very high'})
data["Maintenance"] = data["Maintenance"].replace({'low':'1. low', 
                                                     'med':'2. med',
                                                     'high':'3. high',
                                                     'vhigh':'4. very high'})
data["Maintenance"] = data["Maintenance"].replace({'vhigh':'very high'})
data["Number of Doors"] = data["Number of Doors"].replace({'5more':'5 or more'})
data["Passenger Capacity"] = data["Passenger Capacity"].replace({'more':'5 or more'})
data["Safety"] = data["Safety"].replace({"low":"1. low","med":"2. med","high":"3. high"})
data["Luggage Capacity Size"] = data["Luggage Capacity Size"].replace({"small":"1. small", "med":"2. med", "big":"3. big"})

In [27]:
melty_dict = {}

for i in data.columns:
    for j in data.columns:
        if (i == j) or (i == "count") or (j == "count"):
            pass
        else:
            var = str(i)+"_"+str(j)
            #melty_dict[var] = data.melt(id_vars=str(i), var_name=str(j), value_name='count')
            melty_dict[var] = data[[str(i), str(j), "count"]].groupby(by=[str(i), str(j)]).sum().reset_index()

In [28]:
viz_list = []
for i, j in melty_dict.items():
    if len(set(j["count"].tolist())) == 1:
        pass
    else:
        #melty_dict[i].sort_values(by=[df.columns[1],data.columns[2]])
        var_list = i.split("_")
        title = alt.TitleParams(f'{str.upper(var_list[0])} to Vehicle Rating', anchor='middle')
        x = alt.X(melty_dict[i].columns[0] + ":N", title="")
        y = alt.Y(melty_dict[i].columns[2] + ":Q", title="Car Counts")
        colorino = alt.Color(melty_dict[i].columns[1] + ":N", title="")
        size = melty_dict[i].columns[2] + ":Q"
        viz_list.append(alt.Chart(melty_dict[i], title=title).mark_circle().encode(
            x=x,
            y=y,
            color=colorino,
            size=size,
        ))

In [30]:
viz_list[3] | viz_list[2]  | viz_list[4] | viz_list[5] | viz_list[0] | viz_list[1] 

### Third Round
This is after the third round of feedback from the journaling session.

In [50]:
viz_list = []
for i, j in melty_dict.items():
    if len(set(j["count"].tolist())) == 1:
        pass
    else:
        #melty_dict[i].sort_values(by=[df.columns[1],data.columns[2]])
        var_list = i.split("_")
        title = alt.TitleParams(f'{str.upper(var_list[0])} to Vehicle Rating', anchor='middle')
        x = alt.X(melty_dict[i].columns[0] + ":N", title=f"{melty_dict[i].columns[0]} Categories")
        y = alt.Y(melty_dict[i].columns[2] + ":Q", title="Car Counts")
        colorino = alt.Color(melty_dict[i].columns[1] + ":N", title="")
        size = melty_dict[i].columns[2] + ":Q"
        viz_list.append(alt.Chart(melty_dict[i], title=title).mark_circle().encode(
            x=x,
            y=y,
            color=colorino,
            size=size
        ))

In [51]:
viz_list[3] | viz_list[2]  | viz_list[4] | viz_list[5] | viz_list[0] | viz_list[1] 