In [1]:
'''Load Packages'''
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as sm
import warnings
from sklearn.cluster import KMeans 
import random
from sklearn import metrics 
from scipy.spatial.distance import cdist

In [86]:
def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    is_max = s == s.max()
    return ['background-color: green' if v else '' for v in is_max]

In [4]:
sedan0 = pd.read_csv('clustersGrouped/sedanCluster0.csv')
sedan1 = pd.read_csv('clustersGrouped/sedanCluster1.csv')
sedan2 = pd.read_csv('clustersGrouped/sedanCluster2.csv')
sedan3 = pd.read_csv('clustersGrouped/sedanCluster3.csv')
sedan4 = pd.read_csv('clustersGrouped/sedanCluster4.csv')

Our objective in this notebook is to determine and create a profile for each of the clusters within the sedan vehicle class. There is potential for a lot of overlap with slight differentiators, however this nuances can make for a better customer marketing experience if executed right.

## Sedan0

We see that this first cluster group for sedans does not have anybody opting to finance the vehicle (below)

In [11]:
sedan0.vehicle_financing.unique()

array([0], dtype=int64)

In [18]:
sedan0.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', ascending = False)

Unnamed: 0_level_0,purchase_make,purchase_model,purchase_vehicle_year,trade_in,vehicle_financing,customer_income,customer_gender,AgeBin,made_in,job_assign,Labels
purchase_price_LB,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
65001.0,1,1,1,1,1,1,1,1,1,1,1
60001.0,13,13,13,13,13,13,13,13,13,13,13
55001.0,23,23,23,23,23,23,23,23,23,23,23
50001.0,34,34,34,34,34,34,34,34,34,34,34
45001.0,111,111,111,111,111,111,111,111,111,111,111
40001.0,147,147,147,147,147,147,147,147,147,147,147
35001.0,291,291,291,291,291,291,291,291,291,291,291
30001.0,727,727,727,727,727,727,727,727,727,727,727
25001.0,2035,2035,2035,2035,2035,2035,2035,2035,2035,2035,2035
20001.0,4308,4308,4308,4308,4308,4308,4308,4308,4308,4308,4308


Want to calculate the weighted average for purchase price

In [62]:
sedan0avgPurch = np.dot(sedan0.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).purchase_make.values,
       sedan0.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).index) / len(sedan0)

print(f'The weighted purchase_price_LB for sedan0 is ${sedan0avgPurch}')

The weighted purchase_price_LB for sedan0 is $15623.907929667963


Next, we will investigate the counts of purchase_make and purchase_model to see if their is a skewness towards one or both of them.

In [43]:
sedan0.groupby('purchase_model').count().sort_values(by='purchase_price_LB', ascending = False)

Unnamed: 0_level_0,purchase_make,purchase_vehicle_year,purchase_price_LB,trade_in,vehicle_financing,customer_income,customer_gender,AgeBin,made_in,job_assign,Labels
purchase_model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ALTIMA,1915,1915,1915,1915,1915,1915,1915,1915,1915,1915,1915
CAMRY,1904,1904,1904,1904,1904,1904,1904,1904,1904,1904,1904
COROLLA,1252,1252,1252,1252,1252,1252,1252,1252,1252,1252,1252
MALIBU,995,995,995,995,995,995,995,995,995,995,995
FUSION,977,977,977,977,977,977,977,977,977,977,977
...,...,...,...,...,...,...,...,...,...,...,...
335 HYBRID,1,1,1,1,1,1,1,1,1,1,1
S600,1,1,1,1,1,1,1,1,1,1,1
S65 AMG,1,1,1,1,1,1,1,1,1,1,1
ALERO,1,1,1,1,1,1,1,1,1,1,1


In [63]:
sedan0.groupby('purchase_make').count().sort_values(by='purchase_price_LB', ascending = False)

Unnamed: 0_level_0,purchase_model,purchase_vehicle_year,purchase_price_LB,trade_in,vehicle_financing,customer_income,customer_gender,AgeBin,made_in,job_assign,Labels
purchase_make,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
NISSAN,4029,4029,4029,4029,4029,4029,4029,4029,4029,4029,4029
TOYOTA,3536,3536,3536,3536,3536,3536,3536,3536,3536,3536,3536
FORD,2668,2668,2668,2668,2668,2668,2668,2668,2668,2668,2668
CHEVROLET,2592,2592,2592,2592,2592,2592,2592,2592,2592,2592,2592
BMW,1631,1631,1631,1631,1631,1631,1631,1631,1631,1631,1631
VOLKSWAGEN,1371,1371,1371,1371,1371,1371,1371,1371,1371,1371,1371
LEXUS,1325,1325,1325,1325,1325,1325,1325,1325,1325,1325,1325
INFINITI,1166,1166,1166,1166,1166,1166,1166,1166,1166,1166,1166
MAZDA,1121,1121,1121,1121,1121,1121,1121,1121,1121,1121,1121
DODGE,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000


Approximately half of the observations were manufactured in Japan (might not be special) (below)

In [21]:
sedan0.groupby('made_in').count().sort_values(by='purchase_price_LB', ascending = False)

Unnamed: 0_level_0,purchase_make,purchase_model,purchase_vehicle_year,purchase_price_LB,trade_in,vehicle_financing,customer_income,customer_gender,AgeBin,job_assign,Labels
made_in,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
JAPAN,12900,12900,12900,12900,12900,12900,12900,12900,12900,12900,12900
UNITED STATES,9203,9203,9203,9203,9203,9203,9203,9203,9203,9203,9203
GERMANY,3590,3590,3590,3590,3590,3590,3590,3590,3590,3590,3590
SWEDEN,201,201,201,201,201,201,201,201,201,201,201
UNITED KINGDOM,97,97,97,97,97,97,97,97,97,97,97


In [25]:
## Average customer income?
sedan0.groupby('customer_income').count().sort_values(by='purchase_price_LB', ascending = False)
## Eish not good -> more than half income unaccounted for

Unnamed: 0_level_0,purchase_make,purchase_model,purchase_vehicle_year,purchase_price_LB,trade_in,vehicle_financing,customer_gender,AgeBin,made_in,job_assign,Labels
customer_income,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
?,13710,13710,13710,13710,13710,13710,13710,13710,13710,13710,13710
20001 - 40000,3239,3239,3239,3239,3239,3239,3239,3239,3239,3239,3239
0 - 20000,2889,2889,2889,2889,2889,2889,2889,2889,2889,2889,2889
40001 - 60000,2851,2851,2851,2851,2851,2851,2851,2851,2851,2851,2851
60001 - 80000,1322,1322,1322,1322,1322,1322,1322,1322,1322,1322,1322
80001 - 100000,847,847,847,847,847,847,847,847,847,847,847
200001+,333,333,333,333,333,333,333,333,333,333,333
100001 - 120000,233,233,233,233,233,233,233,233,233,233,233
120001 - 140000,232,232,232,232,232,232,232,232,232,232,232
140001 - 160000,165,165,165,165,165,165,165,165,165,165,165


## Sedan1

In [22]:
sedan0.vehicle_financing.unique()

array([0], dtype=int64)

Also nonody financing

In [27]:
sedan1.groupby('made_in').count().sort_values(by='purchase_price_LB', ascending = False)

Unnamed: 0_level_0,purchase_make,purchase_model,purchase_vehicle_year,purchase_price_LB,trade_in,vehicle_financing,customer_income,customer_gender,AgeBin,job_assign,Labels
made_in,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
JAPAN,20926,20926,20926,20926,20926,20926,20926,20926,20926,20926,20926
UNITED STATES,9806,9806,9806,9806,9806,9806,9806,9806,9806,9806,9806
SWEDEN,324,324,324,324,324,324,324,324,324,324,324
GERMANY,66,66,66,66,66,66,66,66,66,66,66


MADE IN JAPAN

## Sedan2

In [28]:
sedan2.groupby('made_in').count().sort_values(by='purchase_price_LB', ascending = False)

Unnamed: 0_level_0,purchase_make,purchase_model,purchase_vehicle_year,purchase_price_LB,trade_in,vehicle_financing,customer_income,customer_gender,AgeBin,job_assign,Labels
made_in,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
JAPAN,12719,12719,12719,12719,12719,12719,12719,12719,12719,12719,12719
UNITED STATES,8478,8478,8478,8478,8478,8478,8478,8478,8478,8478,8478
GERMANY,7603,7603,7603,7603,7603,7603,7603,7603,7603,7603,7603
UNITED KINGDOM,260,260,260,260,260,260,260,260,260,260,260
SWEDEN,172,172,172,172,172,172,172,172,172,172,172
SOUTH KOREA,12,12,12,12,12,12,12,12,12,12,12


In [31]:
sedan3.groupby('made_in').count().sort_values(by='purchase_price_LB', ascending = False)

Unnamed: 0_level_0,purchase_make,purchase_model,purchase_vehicle_year,purchase_price_LB,trade_in,vehicle_financing,customer_income,customer_gender,AgeBin,job_assign,Labels
made_in,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
UNITED STATES,23482,23482,23482,23482,23482,23482,23482,23482,23482,23482,23482
JAPAN,20256,20256,20256,20256,20256,20256,20256,20256,20256,20256,20256
GERMANY,6253,6253,6253,6252,6253,6253,6253,6253,6253,6253,6253
SWEDEN,5,5,5,5,5,5,5,5,5,5,5
UNITED KINGDOM,3,3,3,3,3,3,3,3,3,3,3


## Sedan3

In [32]:
sedan4.groupby('made_in').count().sort_values(by='purchase_price_LB', ascending = False)

Unnamed: 0_level_0,purchase_make,purchase_model,purchase_vehicle_year,purchase_price_LB,trade_in,vehicle_financing,customer_income,customer_gender,AgeBin,job_assign,Labels
made_in,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
SOUTH KOREA,14606,14606,14606,14606,14606,14606,14606,14606,14606,14606,14606
UNITED KINGDOM,126,126,126,126,126,126,126,126,126,126,126
ITALY,2,2,2,2,2,2,2,2,2,2,2


## Sedan4

In [116]:
avg_PP_0 = np.dot(sedan0.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).purchase_make.values,
       sedan0.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).index) / len(sedan0)

avg_PP_1 = np.dot(sedan1.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).purchase_make.values,
       sedan1.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).index) / len(sedan1)

avg_PP_2 = np.dot(sedan2.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).purchase_make.values,
       sedan2.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).index) / len(sedan2)

avg_PP_3 = np.dot(sedan3.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).purchase_make.values,
       sedan3.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).index) / len(sedan3)

avg_PP_4 = np.dot(sedan4.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).purchase_make.values,
       sedan4.groupby('purchase_price_LB').count().sort_values(by='purchase_price_LB', 
                                                               ascending = False).index) / len(sedan4)

#print(f'The weighted purchase_price_LB for sedan0 is ${sedan0avgPurch}')

weightedPP = pd.DataFrame({
                'sedan0':[avg_PP_0],
                'sedan1':[avg_PP_1],
                'sedan2':[avg_PP_2],
                'sedan3':[avg_PP_3],
                'sedan4':[avg_PP_4],
             })
weightedPP.rename(index={0: "Weighted Purchase_Price_LB"}, inplace = True)
weightedPP.index.name = ''
weightedPP

Unnamed: 0,sedan0,sedan1,sedan2,sedan3,sedan4
,,,,,
Weighted Purchase_Price_LB,15623.90793,13244.846796,19598.182328,12991.159783,13682.620741


### Purchase_make

In [93]:
pd.concat([sedan0.groupby('purchase_make').count().rename(columns={'trade_in':"sedan0"})[['sedan0']],
           sedan1.groupby('purchase_make').count().rename(columns={'trade_in':"sedan1"})[['sedan1']],
          sedan2.groupby('purchase_make').count().rename(columns={'trade_in':"sedan2"})[['sedan2']],
          sedan3.groupby('purchase_make').count().rename(columns={'trade_in':"sedan3"})[['sedan3']],
          sedan4.groupby('purchase_make').count().rename(columns={'trade_in':"sedan4"})[['sedan4']]],axis=1).fillna(0).style.apply(highlight_max).format(int)

Unnamed: 0,sedan0,sedan1,sedan2,sedan3,sedan4
ACURA,608,1660,587,0,0
AUDI,370,0,747,506,0
BMW,1631,2,4247,2267,0
BUICK,480,1408,221,0,0
CADILLAC,614,18,951,987,0
CHEVROLET,2592,1,2205,13026,0
CHRYSLER,874,1,1133,2745,0
DODGE,1000,0,1145,4495,0
FORD,2668,7484,2112,496,0
HONDA,503,3,856,1309,0


### Purchase_Model

In [94]:
pd.concat([sedan0.groupby('purchase_model').count().rename(columns={'trade_in':"sedan0"})[['sedan0']],
           sedan1.groupby('purchase_model').count().rename(columns={'trade_in':"sedan1"})[['sedan1']],
          sedan2.groupby('purchase_model').count().rename(columns={'trade_in':"sedan2"})[['sedan2']],
          sedan3.groupby('purchase_model').count().rename(columns={'trade_in':"sedan3"})[['sedan3']],
          sedan4.groupby('purchase_model').count().rename(columns={'trade_in':"sedan4"})[['sedan4']]],axis=1).fillna(0).style.apply(highlight_max).format(int)

Unnamed: 0,sedan0,sedan1,sedan2,sedan3,sedan4
200,293,0,358,907,0
3-Sep,3,0,0,0,0
300,433,0,680,1351,0
320,1,0,1,0,0
325,116,1,166,293,0
328,697,0,1778,1185,0
330,71,0,121,138,0
335,224,0,581,285,0
335 HYBRID,1,0,1,0,0
5-Sep,2,0,0,0,0


### Customer_Income

In [97]:
pd.concat([sedan0.groupby('customer_income').count().rename(columns={'trade_in':"sedan0"})[['sedan0']],
           sedan1.groupby('customer_income').count().rename(columns={'trade_in':"sedan1"})[['sedan1']],
          sedan2.groupby('customer_income').count().rename(columns={'trade_in':"sedan2"})[['sedan2']],
          sedan3.groupby('customer_income').count().rename(columns={'trade_in':"sedan3"})[['sedan3']],
          sedan4.groupby('customer_income').count().rename(columns={'trade_in':"sedan4"})[['sedan4']]],axis=1).fillna(0).style.apply(highlight_max).format(int)

Unnamed: 0,sedan0,sedan1,sedan2,sedan3,sedan4
0 - 20000,2889,3686,507,5648,1571
100001 - 120000,233,71,4708,0,453
120001 - 140000,232,683,787,606,240
140001 - 160000,165,89,1994,0,210
160001 - 180000,133,257,551,208,106
180001 - 200000,37,14,467,0,41
200001+,333,260,2519,44,243
20001 - 40000,3239,11736,67,23514,4355
40001 - 60000,2851,9863,99,18080,3422
60001 - 80000,1322,1858,11908,125,1506


### Customer_Gender

In [100]:
pd.concat([sedan0.groupby('customer_gender').count().rename(columns={'trade_in':"sedan0"})[['sedan0']],
           sedan1.groupby('customer_gender').count().rename(columns={'trade_in':"sedan1"})[['sedan1']],
          sedan2.groupby('customer_gender').count().rename(columns={'trade_in':"sedan2"})[['sedan2']],
          sedan3.groupby('customer_gender').count().rename(columns={'trade_in':"sedan3"})[['sedan3']],
          sedan4.groupby('customer_gender').count().rename(columns={'trade_in':"sedan4"})[['sedan4']]],axis=1).fillna(0).style.apply(highlight_max).format(int)

Unnamed: 0_level_0,sedan0,sedan1,sedan2,sedan3,sedan4
customer_gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
F,9463,13211,8746,22634,6890
M,13821,14234,17663,20893,6266
U,2707,3677,2835,6472,1578


### Made_In

In [123]:
pd.concat([sedan0.groupby('made_in').count().rename(columns={'trade_in':"sedan0"})[['sedan0']],
           sedan1.groupby('made_in').count().rename(columns={'trade_in':"sedan1"})[['sedan1']],
          sedan2.groupby('made_in').count().rename(columns={'trade_in':"sedan2"})[['sedan2']],
          sedan3.groupby('made_in').count().rename(columns={'trade_in':"sedan3"})[['sedan3']],
          sedan4.groupby('made_in').count().rename(columns={'trade_in':"sedan4"})[['sedan4']]],axis=1).fillna(0).style.apply(highlight_max).format(int)

Unnamed: 0,sedan0,sedan1,sedan2,sedan3,sedan4
GERMANY,3590,66,7603,6253,0
JAPAN,12900,20926,12719,20256,0
SWEDEN,201,324,172,5,0
UNITED KINGDOM,97,0,260,3,126
UNITED STATES,9203,9806,8478,23482,0
SOUTH KOREA,0,0,12,0,14606
ITALY,0,0,0,0,2


### Trade_In

In [118]:
pd.concat([sedan0.groupby('trade_in').count().rename(columns={'made_in':"sedan0"})[['sedan0']],
           sedan1.groupby('trade_in').count().rename(columns={'made_in':"sedan1"})[['sedan1']],
          sedan2.groupby('trade_in').count().rename(columns={'made_in':"sedan2"})[['sedan2']],
          sedan3.groupby('trade_in').count().rename(columns={'made_in':"sedan3"})[['sedan3']],
          sedan4.groupby('trade_in').count().rename(columns={'made_in':"sedan4"})[['sedan4']]],axis=1).fillna(0).style.apply(highlight_max).format(int)

Unnamed: 0_level_0,sedan0,sedan1,sedan2,sedan3,sedan4
trade_in,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,15775,18973,12879,31529,8188
1,10216,12149,16365,18470,6546


### Finance

In [119]:
pd.concat([sedan0.groupby('vehicle_financing').count().rename(columns={'made_in':"sedan0"})[['sedan0']],
           sedan1.groupby('vehicle_financing').count().rename(columns={'made_in':"sedan1"})[['sedan1']],
          sedan2.groupby('vehicle_financing').count().rename(columns={'made_in':"sedan2"})[['sedan2']],
          sedan3.groupby('vehicle_financing').count().rename(columns={'made_in':"sedan3"})[['sedan3']],
          sedan4.groupby('vehicle_financing').count().rename(columns={'made_in':"sedan4"})[['sedan4']]],axis=1).fillna(0).style.apply(highlight_max).format(int)

Unnamed: 0_level_0,sedan0,sedan1,sedan2,sedan3,sedan4
vehicle_financing,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,25991,27,495,0,2927
1,0,31095,28749,49999,11807


### Age

In [122]:
pd.concat([sedan0.groupby('AgeBin').count().rename(columns={'made_in':"sedan0"})[['sedan0']],
           sedan1.groupby('AgeBin').count().rename(columns={'made_in':"sedan1"})[['sedan1']],
          sedan2.groupby('AgeBin').count().rename(columns={'made_in':"sedan2"})[['sedan2']],
          sedan3.groupby('AgeBin').count().rename(columns={'made_in':"sedan3"})[['sedan3']],
          sedan4.groupby('AgeBin').count().rename(columns={'made_in':"sedan4"})[['sedan4']]],axis=1).fillna(0).style.apply(highlight_max).format(int)

Unnamed: 0_level_0,sedan0,sedan1,sedan2,sedan3,sedan4
AgeBin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Eighties,583,225,33,214,123
Fifties,4246,4273,4809,5200,2176
Forties,5390,6023,9022,9020,3133
HundredPlus,137,60,117,95,41
Nineties,46,14,4,11,9
Seventies,1245,884,237,891,365
Sixties,2395,2175,1412,2678,1073
Thirties,5065,7418,8371,11821,3111
Twenties,6023,9341,4894,18476,4267
UnderTwenty,733,654,242,1529,405


<em>k</em>-means will partition your customers into three groups since we specified the algorithm to generate 3 clusters. The customers in each cluster are similar to each other in terms of the features included in the dataset.

Now we can create a profile for each group, considering the common characteristics of each cluster. 
For example, the 3 clusters can be:

    0. - Twenties to Thirties or Forties and Fifties
       - Majority Customer Income Unkown "?"
       - No Financing
       - Majority Male
       - Made in Japan and U.S.
       - More likely to not trade in a vehicle 

    1. - Twenties to Thirties or Forties to Fifties
       - Majority Income between $20k - $60k
       - Finance 
       - Unisex
       - 2/3 in made in Japan and 1/3 in U.S.
       - More likely to not trade in a vehicle
        
    2. - Thirties to Forties 
       - Income between $60k - $80k
       - Finance
       - Male
       - Germany + U.S and Japan
       - More likely to trade in a vehicle
   
    3. - Thirties and Twenties
       - Income $0 - $60k
       - Finance 
       - Female
       - U.S. Japan and some Germany
       - More likely to not trade in
       
    4. - Downwards trend from twenties to sixties 
       - $20k - $60k
       - Finance
       - Unisex
       - Made in South Korea
       - More likely to trade in