In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)


cars = pd.read_csv('../dataset/cars-dataset.csv')
countries = pd.read_csv('../dataset/brands_countries.csv')



Columns (32) have mixed types. Specify dtype option on import or set low_memory=False.



In [40]:
cars['Company'].unique()

array(['AC', 'ACURA', 'ALFA ROMEO', 'ALPINE', 'ARIEL', 'ARO', 'ARTEGA',
       'ASTON MARTIN', 'AUDI', 'AURUS', 'BENTLEY', 'BMW', 'BRISTOL',
       'BUFORI', 'BUGATTI', 'BUICK', 'CADILLAC', 'CATERHAM', 'CHEVROLET',
       'CHRYSLER', 'CITROEN', 'CUPRA', 'DACIA', 'DAEWOO', 'DAIHATSU',
       'DATSUN', 'DeLorean', 'DODGE', 'DONKERVOORT', 'DR MOTOR',
       'DS AUTOMOBILES', 'EAGLE', 'FERRARI', 'FIAT', 'FISKER', 'FORD',
       'FSO', 'GEELY', 'GENESIS', 'GMC', 'GORDON MURRAY Automotive',
       'GTA Motor', 'HINDUSTAN', 'HOLDEN', 'HONDA', 'HUMMER', 'HYUNDAI',
       'INEOS', 'INFINITI', 'ISUZU', 'JAGUAR', 'JEEP', 'Karma', 'KIA',
       'KOENIGSEGG', 'KTM', 'LADA', 'LAMBORGHINI', 'LANCIA', 'LAND ROVER',
       'LEXUS', 'LIGHTYEAR', 'LINCOLN', 'LOTUS', 'Lucid Motors',
       'Mahindra', 'MARUSSIA', 'MARUTI SUZUKI', 'MASERATI', 'MAYBACH',
       'MAZDA', 'MCLAREN', 'MERCEDES BENZ', 'Mercedes-AMG', 'MERCURY',
       'MG', 'MINI', 'MITSUBISHI', 'MORGAN', 'NIO', 'NISSAN',
       'OLDSMOBILE', '

In [41]:
#count unique companies
cars['Company'].nunique()

124

In [42]:
#Count lines of data
cars['Company'].count()

29792

In [43]:
# Join the two dataframes along rows
complete = cars.merge(countries, on='Company', how='left')

#Save the complete dataset to a new csv file
complete.to_csv('complete_cars.csv', index=False)

In [44]:
#Count the number of car brands per country
number_of_brands = countries['Country'].value_counts()
number_of_brands

Country
United States     26
United Kingdom    18
Japan             13
Germany           12
Italy              9
France             6
South Korea        6
Sweden             4
India              4
China              4
Spain              4
Netherlands        3
Malaysia           3
Russia             3
Romania            2
Poland             1
Australia          1
Austria            1
Croatia            1
Czech Republic     1
Vietnam            1
Denmark            1
Name: count, dtype: int64

In [45]:
custom_colors = ['#1865A5', '#76C1EF','#DE3F47','#950E3F']

# Brands per Country

In [46]:
country_counts = countries['Country'].value_counts()
# Create a DataFrame from the counts
df_counts = pd.DataFrame({'Country': country_counts.index, 'Count': country_counts.values})

fig = px.choropleth(df_counts, 
                    locations='Country', 
                    locationmode='country names', 
                    color='Count', 
                    hover_name='Country', 
                    title='Number of car brands per country',
                    color_continuous_scale=custom_colors,
                    labels={'Count':'Number of car brands'},
                    projection='natural earth',
                    scope='world',
                    width=1200,
                    height=800)

fig.show()

In [47]:
#Bar plot with the manufacturers and brands count

fig = px.bar(countries, 
             y='Country', 
             orientation='h',  
             color_continuous_scale=custom_colors,
             color=countries['Company'].value_counts().values,
             text='Company',
             title='Number of car brands per country')


fig.show()


In [48]:
#Show the brands from Spain
spain_brands = countries[countries['Country'] == 'Spain']
spain_brands

Unnamed: 0,Company,Country,Continent
21,CUPRA,Spain,Europe
41,GTA Motor,Spain,Europe
102,SANTANA,Spain,Europe
105,SEAT,Spain,Europe


In [49]:
#Count the number of countries with brands
countries['Country'].nunique()

22

In [50]:
cars

Unnamed: 0.1,Unnamed: 0,Model,Serie,Company,Body style,Segment,Production years,Cylinders,Displacement,Power(HP),Power(BHP),Power(KW),Torque(lb-ft),Torque(Nm),Electrical motor power,Electrical motor torque,Fuel System,Fuel,Fuel capacity,Top Speed,Acceleration 0-62 Mph (0-100 kph),Drive Type,Gearbox,Front brake,Rear brake,Tire Size,Length,Width,Height,Front/rear Track,Wheelbase,Ground Clearance,Aerodynamics (Cd),Aerodynamics (frontal area),Turning circle,Cargo Volume,Unladen Weight,Gross Weight Limit,Combined mpg,City mpg,Highway mpg,CO2 Emissions,CO2 Emissions (Combined),Turning circle (curb to curb),Total maximum power,Power pack,Nominal Capacity,Top speed (electrical),EV Range,High mpg,Extra high mpg,Medium mpg,Low mpg,Total maximum torque,Maximum Capacity,Specification summary
0,0,AC Ace (1993-1996),Ace,AC,"Convertible (spider/spyder, cabrio/cabriolet, ...",Coupe Cabrio,"1993, 1994, 1995, 1996",V8,4942 cm3,260 HP @ 5250 RPM,256 BHP @ 5250 RPM,191.2 KW @ 5250 RPM,320 lb-ft @ 3250 RPM,434 Nm @ 3250 RPM,,,Multipoint Injection Naturally aspirated,Gasoline,21.1 gallons (79.9 L),140 mph (225 km/h),,Rear Wheel Drive,4-speed automatic,Ventilated Discs,Ventilated discs,225/50 ZR16,174 in (4420 mm),73.6 in (1869 mm),51.2 in (1300 mm),"62.6/61.8 in (1,590/1,570 mm)",97.3 in (2471 mm),,,,,6.5 cuFT (184 L),3175 lbs (1440 kg),,27.4 mpg US (8.6 L/100Km),,,,,,,,,,,,,,,,,4.9L V8 4AT (260 HP)
1,1,AC Aceca (1998-2000),Aceca,AC,Coupé (two-door),Coupe,"1998, 1999, 2000",V8,3506 cm3,350 HP @ 6500 RPM,345 BHP @ 6500 RPM,257.4 KW @ 6500 RPM,300 lb-ft @ 4000 RPM,407 Nm @ 4000 RPM,,,Multipoint Injection Naturally aspirated,Gasoline,23.8 gallons (90.1 L),155 mph (249 km/h),5.6 s,Rear Wheel Drive,6-manual speed,Ventilated Discs,Ventilated discs,255/55 R18,183.5 in (4661 mm),74.4 in (1890 mm),52.8 in (1341 mm),"62.6/61.8 in (1,590/1,570 mm)",107.1 in (2720 mm),,0.35,2.1 m2,,17.7 cuFT (501 L),3560 lbs (1615 kg),,,11.8 mpg US (19.9 L/100Km),26.1 mpg US (9 L/100Km),,,,,,,,,,,,,,,3.5L V8 32V Turbo 6MT (354 HP)
2,2,AC Aceca (1998-2000),Aceca,AC,Coupé (two-door),Coupe,"1998, 1999, 2000",V8,3506 cm3,350 HP @ 6500 RPM,345 BHP @ 6500 RPM,257.4 KW @ 6500 RPM,300 lb-ft @ 4000 RPM,407 Nm @ 4000 RPM,,,Multipoint Injection Naturally aspirated,Gasoline,23.8 gallons (90.1 L),155 mph (249 km/h),5.6 s,Rear Wheel Drive,6-manual speed,Ventilated Discs,Ventilated discs,255/55 R18,183.5 in (4661 mm),74.4 in (1890 mm),52.8 in (1341 mm),"62.6/61.8 in (1,590/1,570 mm)",107.1 in (2720 mm),,0.35,2.1 m2,,17.7 cuFT (501 L),3560 lbs (1615 kg),,,11.8 mpg US (19.9 L/100Km),26.1 mpg US (9 L/100Km),,,,,,,,,,,,,,,4.6L V8 5MT (326 HP)
3,3,AC Cobra MkIII (1965-1967),Cobra,AC,Coupé (two-door),Roadster & Convertible,"1965, 1966, 1967",V8,4727 cm3,280 HP @ 5800 RPM,276 BHP @ 5800 RPM,205.9 KW @ 5800 RPM,314 lb-ft @ 3400 RPM,426 Nm @ 3400 RPM,,,,Gasoline,18 gallons (68.1 L),140 mph (225 km/h),4.7 s,Rear Wheel Drive,4-speed manual,,,,151.5 in (3848 mm),61 in (1549 mm),49 in (1245 mm),"51.5/52.5 in (1,308/1,334 mm)",90 in (2286 mm),,,1.6 m2,,,2019 lbs (916 kg),,,,,,,,,,,,,,,,,,,4.7L V8 4MT (280 HP)
4,4,AC Cobra 378 MkIV (1965-1967),Cobra,AC,Coupé (two-door),Roadster & Convertible,"1965, 1966, 1967",V8,,446.1 HP @ - RPM,440 BHP @ - RPM,328.1 KW @ - RPM,,,,,,Gasoline,18 gallons (68.1 L),,,Rear Wheel Drive,6-speed manual,,,,153.5 in (3899 mm),70.1 in (1781 mm),46.9 in (1191 mm),"54.9/56.7 in (1,394/1,440 mm)",90 in (2286 mm),5.1 in (130 mm),,,,,,,,,,,,,,,,,,,,,,,,6.2L V8 6MT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29787,29787,2026 Zenvo Aurora Agil,Aurora,Zenvo,Coupé (two-door),Exotic,2023,V12,,1250 HP @ 9800 RPM,1233 BHP @ 9800 RPM,919.4 KW @ 9800 RPM,,,147.1 kw (200 hp),,Turbocharged Direct Injection,Hybrid Gasoline,,227 mph (365 km/h),2.5 s,Rear Wheel Drive,Hybridised seven-speed paddle shift gearbox wi...,Ventilated discs,Ventilated discs,265/35 R20 // 325/30 R21,190.4 in (4836 mm),79.5 in (2019 mm),43.2 in (1097 mm),,110.2 in (2799 mm),,,,,,2866 lbs (1300 kg),,,,,,,,1081.9 kw (1471 hp),,,,,,,,,1032.6 lb-ft (1400 Nm),,6.6L V12 7AT RWD (1471 HP)
29788,29788,2026 Zenvo Aurora Tur,Aurora,Zenvo,Coupé (two-door),Exotic,2023,V12,,1250 HP @ 9800 RPM,1233 BHP @ 9800 RPM,919.4 KW @ 9800 RPM,,,147.1 kw (200 hp),,Turbocharged Direct Injection,Mild Hybrid,,280 mph (451 km/h),2.3 s,All Wheel Drive,7-speed automatic,Ventilated Discs,Ventilated Discs,265/35 R20 // 325/30 R21,189.7 in (4818 mm),79.5 in (2019 mm),44 in (1118 mm),,110.2 in (2799 mm),,,,,,3197 lbs (1450 kg),,,,,,,,1379.1 kw (1875 hp),,,,,,,,,1253.9 lb-ft (1700 Nm),,6.6L V12 7AT AWD (1876 HP)
29789,29789,Zenvo TS1 (2017-Present),TS1,Zenvo,Coupé (two-door),Exotic,"2017, 2018, 2019, 2020, 2021, 2022, 2023",V8,5800 cm3,1118 HP @ 7100 RPM,1103 BHP @ 7100 RPM,822.3 KW @ 7100 RPM,840 lb-ft @ - RPM,1139 Nm @ - RPM,,,Supercharged Multipoint Injection,,,233 mph (375 km/h),2.8 s,Rear Wheel Drive,7-speed automatic,Ventilated Discs,Ventilated Discs,265/35 ZR19 || 345/30 ZR20,184.3 in (4681 mm),84.8 in (2154 mm),47.2 in (1199 mm),,114.3 in (2903 mm),6.3 in (160 mm),,,,4.8 cuFT (136 L),3483 lbs (1580 kg),,,,,,,,,,,,,,,,,,,5.8 V8 7AT (1118 HP)
29790,29790,Zenvo TSR-S (2018-Present),TSR-S,Zenvo,,Exotic,"2018, 2019, 2020, 2021, 2022, 2023",V8,5800 cm3,1193.3 HP @ 7100 RPM,1177 BHP @ 7100 RPM,877.7 KW @ 7100 RPM,840 lb-ft @ - RPM,1139 Nm @ - RPM,,,Sequential Fuel Injection,Gasoline,,202 mph (325 km/h),2.8 s,All Wheel Drive,7-speed automatic,Ventilated Discs,Discs,245/35-20/325/30-21,189.6 in (4816 mm),80.2 in (2037 mm),47.2 in (1199 mm),,114.4 in (2906 mm),3.7 in (94 mm),,,,,3296 lbs (1495 kg),,,,,,,,,,,,,,,,,,,5.8L V8 7AT (1177 HP)


In [51]:
#Create a new column that only has the power(hp) value only up to the first space
cars['HP'] = cars['Power(HP)'].str.split(' ').str[0]
cars['HP']

cars['Torque(Nm)'] = cars['Torque(Nm)'].str.split(' ').str[0]
cars['Torque(Nm)'] = cars['Torque(Nm)'].astype(float)

In [87]:
cars['Body style'].unique()

#Count the number of body styles

#cars[cars['Body style'].isna()]

cars['Body style'].fillna('Sedan', inplace=True)

cars['Body style']


0        Convertible (spider/spyder, cabrio/cabriolet, ...
1                                         Coupé (two-door)
2                                         Coupé (two-door)
3                                         Coupé (two-door)
4                                         Coupé (two-door)
5                                         Coupé (two-door)
6        Convertible (spider/spyder, cabrio/cabriolet, ...
7        Convertible (spider/spyder, cabrio/cabriolet, ...
8                                         Coupé (two-door)
9                                         Coupé (two-door)
10                            SUV (Sports Utility Vehicle)
11                            SUV (Sports Utility Vehicle)
12                            SUV (Sports Utility Vehicle)
13                            SUV (Sports Utility Vehicle)
14                            SUV (Sports Utility Vehicle)
15                            SUV (Sports Utility Vehicle)
16                            SUV (Sports Utility Vehicl

In [52]:
#pd.set_option('display.max_rows', None)
#cars['HP'].value_counts()

HP
150       1572
90         909
140        744
75         650
170        643
110        629
116        613
120        563
136        550
105        505
163        445
190        405
125        396
200        375
101        369
184        363
160        334
204        332
95         327
131        282
177        274
60         267
115        267
122        263
109        243
143        235
245        218
218        218
130        207
180        206
306        204
68         193
231        189
69         187
80         184
102        183
340        181
250        181
100        179
156        174
179        165
86         165
175        158
165        158
211        157
192        150
300        148
82         148
272        146
114        144
258        143
220        143
193        139
147        133
111        132
145        130
185        130
106        128
65         124
155        123
129        121
286        119
141        116
98         114
265        113
54         110
280    

In [53]:
#Cars columns
cars.columns

Index(['Unnamed: 0', 'Model', 'Serie', 'Company', 'Body style', 'Segment',
       'Production years', 'Cylinders', 'Displacement', 'Power(HP)',
       'Power(BHP)', 'Power(KW)', 'Torque(lb-ft)', 'Torque(Nm)',
       'Electrical motor power', 'Electrical motor torque', 'Fuel System',
       'Fuel', 'Fuel capacity', 'Top Speed',
       'Acceleration 0-62 Mph (0-100 kph)', 'Drive Type', 'Gearbox',
       'Front brake', 'Rear brake', 'Tire Size', 'Length', 'Width', 'Height',
       'Front/rear Track', 'Wheelbase', 'Ground Clearance',
       'Aerodynamics (Cd)', 'Aerodynamics (frontal area)', 'Turning circle',
       'Cargo Volume', 'Unladen Weight', 'Gross Weight Limit', 'Combined mpg',
       'City mpg', 'Highway mpg', 'CO2 Emissions', 'CO2 Emissions (Combined)',
       'Turning circle (curb to curb)', 'Total maximum power', 'Power pack',
       'Nominal Capacity', 'Top speed (electrical)', 'EV Range', 'High mpg',
       'Extra high mpg', 'Medium mpg', 'Low mpg', 'Total maximum torque',
 

In [54]:
#Show max speed column
cars['Top Speed'].value_counts()

Top Speed
155 mph (249 km/h)             1744
130 mph (209 km/h)              818
112 mph (180 km/h)              748
118 mph (190 km/h)              671
124 mph (200 km/h)              669
155.3 mph (250 km/h)            549
115 mph (185 km/h)              535
127 mph (204 km/h)              501
121 mph (195 km/h)              477
109 mph (175 km/h)              471
140 mph (225 km/h)              465
137 mph (220 km/h)              455
143 mph (230 km/h)              451
106 mph (171 km/h)              409
134 mph (216 km/h)              350
146 mph (235 km/h)              332
149 mph (240 km/h)              328
119 mph (192 km/h)              301
117 mph (188 km/h)              298
126 mph (203 km/h)              297
122 mph (196 km/h)              270
114 mph (183 km/h)              266
129 mph (208 km/h)              261
103 mph (166 km/h)              255
99 mph (159 km/h)               252
116 mph (187 km/h)              249
132 mph (212 km/h)              247
105.6 mph (170 km/

In [55]:
#Considerate only the km/h values
# Replace NaN values with an empty string
cars['Top Speed'].fillna('', inplace=True)

# Extract km/h values
cars['Top Speed km/h'] = cars['Top Speed'].str.extract(r'(\d+)\s+km/h').astype(float)

#Drop Top Speed
cars.drop(columns='Top Speed', inplace=True)

#Show the new column
cars['Top Speed km/h']

0         225.0
1         249.0
2         249.0
3         225.0
4           NaN
5         227.0
6           NaN
7           NaN
8           NaN
9           NaN
10          NaN
11          NaN
12          NaN
13          NaN
14          NaN
15          NaN
16          NaN
17          NaN
18          NaN
19          NaN
20        220.0
21        209.0
22        190.0
23          NaN
24          NaN
25          NaN
26          NaN
27          NaN
28          NaN
29          NaN
30          NaN
31          NaN
32        195.0
33        209.0
34          NaN
35          NaN
36          NaN
37          NaN
38          NaN
39        209.0
40        201.0
41        201.0
42        222.0
43        222.0
44          NaN
45          NaN
46          NaN
47          NaN
48          NaN
49          NaN
50          NaN
51        209.0
52        203.0
53          NaN
54          NaN
55        209.0
56        307.0
57        260.0
58        270.0
59          NaN
60          NaN
61        270.0
62      

In [56]:
#Show Bugatti top speed
cars[cars['Company'] == 'BUGATTI']['Top Speed km/h']

5036    420.0
5037    349.0
5038    420.0
5039    420.0
5040    420.0
5041      NaN
5042    380.0
5043    380.0
5044    420.0
5045    349.0
5046    343.0
5047      NaN
5048      NaN
5049    169.0
5050      NaN
5051     60.0
5052      NaN
5053      NaN
5054    121.0
5055    121.0
5056    121.0
5057    201.0
5058    161.0
5059    137.0
5060    145.0
5061    137.0
5062      NaN
5063      NaN
5064    192.0
5065    180.0
5066      NaN
5067    220.0
5068      NaN
5069    193.0
5070    410.0
5071    415.0
5072    407.0
5073    407.0
Name: Top Speed km/h, dtype: float64

In [57]:
#Average Top speed by brand by year
cars.groupby('Company')['Top Speed km/h'].mean()


Company
AC                          235.000000
ACURA                       220.540000
ALFA ROMEO                  203.716180
ALPINE                      208.894737
ARIEL                       247.333333
ARO                         122.142857
ARTEGA                      270.000000
ASTON MARTIN                290.840000
AUDI                        225.274520
AURUS                       204.000000
BENTLEY                     288.309524
BMW                         229.098230
BRISTOL                     233.615385
BUFORI                      196.600000
BUGATTI                     274.928571
BUICK                       198.148936
CADILLAC                    230.300971
CATERHAM                    208.769231
CHEVROLET                   205.779141
CHRYSLER                    203.300000
CITROEN                     183.268600
CUPRA                       216.055556
DACIA                       161.313609
DAEWOO                      178.875000
DAIHATSU                    156.653846
DATSUN           

In [58]:
#save the mean top speed by brand to a new csv file
cars.groupby('Company')['Top Speed km/h'].mean().to_csv('mean_top_speed_by_brand.csv')


In [59]:
# Calculate the mean top speed by brand
mean_top_speed = cars.groupby('Company')['Top Speed km/h'].mean().reset_index()

# Remove rows with NaN values in 'Company' column
mean_top_speed = mean_top_speed.dropna(subset=['Top Speed km/h'])

# Plot the mean top speed by brand, excluding rows with NaN values
fig = px.bar(mean_top_speed[mean_top_speed['Company'].notna()], 
             x='Company', 
             y='Top Speed km/h', 
             title='Mean top speed by brand',
             color='Top Speed km/h',
             color_continuous_scale=custom_colors,
             width=1200,
             height=600)

# Update layout to adjust the order of x-axis categories
fig.update_layout(xaxis={'categoryorder':'total descending'})

fig.show()

In [60]:
#Filter the cars that were produced in the selected year and calculate the mean top speed per company and add the year selected

evolution_top_speed = pd.DataFrame()

for i in range(1899, 2025):
    year_selected = i
    #print(i)
    aux_evolution_top_speed = cars[cars['Production years'].str.contains(str(year_selected))].groupby('Company')['Top Speed km/h'].mean()
    # change name of column
    aux_evolution_top_speed = aux_evolution_top_speed.reset_index()
    aux_evolution_top_speed.columns = ['Company', 'Mean Top Speed km/h']
    # add year column

    aux_evolution_top_speed['Year'] = year_selected

    evolution_top_speed = pd.concat([evolution_top_speed, aux_evolution_top_speed])


#evolution_top_speed

# store evolution_top_speed to a new csv file
evolution_top_speed.to_csv('evolution_top_speed.csv', index=False)

In [61]:
line_colors = ['#1865A5', '#76C1EF', '#f9d29f', '#EBA74C', '#DE3F47','#950E3F']

In [62]:
#Make the same plot but with line plot
brands_filter = ['BMW', 'AUDI', 'MERCEDES BENZ', 'TOYOTA']

filtered_evolution_top_speed = evolution_top_speed[evolution_top_speed['Company'].isin(brands_filter)]

fig = px.line(filtered_evolution_top_speed,
                x='Year',
                y='Mean Top Speed km/h',
                title='Mean top speed by brand',
                color='Company',
                color_discrete_sequence=line_colors,
                width=1200,
                height=900,
                )

#Change line width
fig.update_traces(line=dict(width=4))

#Add a slider to select the year
fig.update_layout(xaxis_title='Year',
                  yaxis_title='Mean Top Speed km/h',
                  title='Mean top speed by brand',
                  xaxis=dict(range=[1920, 2024]),
                  xaxis_rangeslider_visible=True)

fig.show()

In [63]:
# Average power by brand by year

cars['HP'] = cars['HP'].astype(float)

evolution_power_per_brand = pd.DataFrame()

for i in range(1899, 2025):
    year_selected = i
    #print(i)
    aux_evolution_power_per_brand = pd.DataFrame()
    aux_evolution_power_per_brand = cars[cars['Production years'].str.contains(str(year_selected))].groupby('Company')['HP'].mean()
    # change name of column
    aux_evolution_power_per_brand = aux_evolution_power_per_brand.reset_index()
    aux_evolution_power_per_brand.columns = ['Company', 'Mean HP']
    # add year column

    aux_evolution_power_per_brand['Year'] = year_selected

    evolution_power_per_brand = pd.concat([evolution_power_per_brand, aux_evolution_power_per_brand])


#evolution_power_per_brand
#cars.groupby('Company')['Power(HP)'].mean()


In [64]:
# line plot by year of the mean power by brand by year
brands = ['TOYOTA', 'BUGATTI', 'FERRARI', 'BMW', 'AUDI', 'PORSCHE', 'FORD', 'DODGE']

filtered_evolution_power = evolution_power_per_brand[evolution_power_per_brand['Company'].isin(brands)]

fig = px.line(filtered_evolution_power,
                x='Year',
                y='Mean HP',
                title='Mean HP by brand',
                color='Company',
                width=1200,
                height=900,
                color_discrete_sequence=line_colors
                )

#Change line width
fig.update_traces(line=dict(width=4))

#Add a slider to select the year
fig.update_layout(xaxis_title='Year',
                  yaxis_title='Mean HP',
                  title='Mean HP by brand',
                  xaxis=dict(range=[1920, 2024]),
                  xaxis_rangeslider_visible=True)

fig.show()

## Plot power vs displacement after choosing the fuel type

In [65]:
# Cast from string to float
cars['Displacement'] = cars['Displacement'].str.extract(r'(\d+)').astype(float)

In [66]:
#Plot power vs displacement after choosing the fuel type

print(cars['Fuel'].value_counts())

#Filter the cars that have the selected fuel type

fuel_type = 'Diesel'

brands_filter = ['BMW', 'AUDI', 'MERCEDES BENZ', 'TOYOTA', 'BUGATTI', 'FERRARI']

cars_filtered = cars[cars['Fuel'] == fuel_type]

cars_filtered = cars_filtered[cars_filtered['Company'].isin(brands_filter)]

fig = px.scatter(cars_filtered,
                x='Displacement',
                y='HP',
                title='Power vs Displacement',
                color='Company',
                width=1200,
                height=900,
                )

fig.show()

Fuel
Gasoline                         19819
Diesel                            8313
Electric                           392
Hybrid                             343
Hybrid Gasoline                    235
Mild Hybrid                        209
Natural Gas                        136
Mild Hybrid Diesel                 109
Ethanol                             79
Plug-in Hybrid                      72
Hybrid Diesel                       10
Liquefied Petroleum Gas (LPG)       10
Name: count, dtype: int64


## Comparison of Specific Cars

### Dropdowns for 5 cars

In [67]:
from ipywidgets import Dropdown, HBox, VBox

def update_specifications(change):
    brand1 = brand_dropdown1.value
    serie1_dropdown.options = list(cars['Serie'][cars['Company'] == brand1].unique())
    specification1_dropdown.options = list(cars['Specification summary'][(cars['Company'] == brand1) & (cars['Serie'] == serie1_dropdown.value)].unique())
    
    brand2 = brand_dropdown2.value
    serie2_dropdown.options = list(cars['Serie'][cars['Company'] == brand2].unique())
    specification2_dropdown.options = list(cars['Specification summary'][(cars['Company'] == brand2) & (cars['Serie'] == serie2_dropdown.value)].unique())
    
    brand3 = brand_dropdown3.value
    serie3_dropdown.options = list(cars['Serie'][cars['Company'] == brand3].unique())
    specification3_dropdown.options = list(cars['Specification summary'][(cars['Company'] == brand3) & (cars['Serie'] == serie3_dropdown.value)].unique())
    
    brand4 = brand_dropdown4.value
    serie4_dropdown.options = list(cars['Serie'][cars['Company'] == brand4].unique())
    specification4_dropdown.options = list(cars['Specification summary'][(cars['Company'] == brand4) & (cars['Serie'] == serie4_dropdown.value)].unique())
    
    brand5 = brand_dropdown5.value
    serie5_dropdown.options = list(cars['Serie'][cars['Company'] == brand5].unique())
    specification5_dropdown.options = list(cars['Specification summary'][(cars['Company'] == brand5) & (cars['Serie'] == serie5_dropdown.value)].unique())

# Dropdown widgets for 5 cars
brand_dropdown1 = Dropdown(options=list(cars['Company'].unique()), description='Car 1 Brand:')
serie1_dropdown = Dropdown(description='Car 1 Serie:')
specification1_dropdown = Dropdown(description='Car 1 Specification:')

brand_dropdown2 = Dropdown(options=list(cars['Company'].unique()), description='Car 2 Brand:')
serie2_dropdown = Dropdown(description='Car 2 Serie:')
specification2_dropdown = Dropdown(description='Car 2 Specification:')

brand_dropdown3 = Dropdown(options=list(cars['Company'].unique()), description='Car 3 Brand:')
serie3_dropdown = Dropdown(description='Car 3 Serie:')
specification3_dropdown = Dropdown(description='Car 3 Specification:')

brand_dropdown4 = Dropdown(options=list(cars['Company'].unique()), description='Car 4 Brand:')
serie4_dropdown = Dropdown(description='Car 4 Serie:')
specification4_dropdown = Dropdown(description='Car 4 Specification:')

brand_dropdown5 = Dropdown(options=list(cars['Company'].unique()), description='Car 5 Brand:')
serie5_dropdown = Dropdown(description='Car 5 Serie:')
specification5_dropdown = Dropdown(description='Car 5 Specification:')

# Observe changes in dropdowns to update options dynamically
brand_dropdown1.observe(update_specifications, names='value')
serie1_dropdown.observe(update_specifications, names='value')

brand_dropdown2.observe(update_specifications, names='value')
serie2_dropdown.observe(update_specifications, names='value')

brand_dropdown3.observe(update_specifications, names='value')
serie3_dropdown.observe(update_specifications, names='value')

brand_dropdown4.observe(update_specifications, names='value')
serie4_dropdown.observe(update_specifications, names='value')

brand_dropdown5.observe(update_specifications, names='value')
serie5_dropdown.observe(update_specifications, names='value')

# Display the dropdowns for 5 cars in one line
HBox([VBox([brand_dropdown1, serie1_dropdown, specification1_dropdown]),
      VBox([brand_dropdown2, serie2_dropdown, specification2_dropdown]),
      VBox([brand_dropdown3, serie3_dropdown, specification3_dropdown]),
      VBox([brand_dropdown4, serie4_dropdown, specification4_dropdown]),
      VBox([brand_dropdown5, serie5_dropdown, specification5_dropdown])])


HBox(children=(VBox(children=(Dropdown(description='Car 1 Brand:', options=('AC', 'ACURA', 'ALFA ROMEO', 'ALPI…

In [89]:
print("Car 1 :", brand_dropdown1.value, serie1_dropdown.value, specification1_dropdown.value)
print("Car 2 :", brand_dropdown2.value, serie2_dropdown.value, specification2_dropdown.value)
print("Car 3 :", brand_dropdown3.value, serie3_dropdown.value, specification3_dropdown.value)
print("Car 4 :", brand_dropdown4.value, serie4_dropdown.value, specification4_dropdown.value)
print("Car 5 :", brand_dropdown5.value, serie5_dropdown.value, specification5_dropdown.value)

car1 = {'Company': brand_dropdown1.value, 'Serie': serie1_dropdown.value, 'Specification': specification1_dropdown.value}
car2 = {'Company': brand_dropdown2.value, 'Serie': serie2_dropdown.value, 'Specification': specification2_dropdown.value}
car3 = {'Company': brand_dropdown3.value, 'Serie': serie3_dropdown.value, 'Specification': specification3_dropdown.value}
car4 = {'Company': brand_dropdown4.value, 'Serie': serie4_dropdown.value, 'Specification': specification4_dropdown.value}
car5 = {'Company': brand_dropdown5.value, 'Serie': serie5_dropdown.value, 'Specification': specification5_dropdown.value}


Car 1 : ARTEGA Scalo 37 KWh (265 HP)
Car 2 : ARIEL Atom 2.0L i-VTEC 6MT (245 HP)
Car 3 : ARO IMS Series 3.2
Car 4 : AURUS Senat L700 4.4L V8 9AT AWD (598 HP)
Car 5 : BMW 7 Series 730d 8AT (265 HP)


In [90]:
# add column with car brand + specification summary

cars['Name'] = cars['Model'] + ' ' + cars['Specification summary']

#cars['Torque(Nm)'] = cars['Torque(Nm)'].str.extract(r'(\d+)').astype(float)

#cars['Acceleration 0-62 Mph (0-100 kph)'] = cars['Acceleration 0-62 Mph (0-100 kph)'].str.extract(r'(\d+\.\d+)').astype(float)

In [91]:
def plot_car_comparison(car1brand, car1serie, car1specification, car2brand, car2serie, car2specification, car3brand, car3serie, car3specification, car4brand, car4serie, car4specification, car5brand, car5serie, car5specification):
    
    cars_filtered = cars[(cars['Company'] == car1brand) & (cars['Serie'] == car1serie) & (cars['Specification summary'] == car1specification)]
    cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car2brand) & (cars['Serie'] == car2serie) & (cars['Specification summary'] == car2specification)]])
    cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car3brand) & (cars['Serie'] == car3serie) & (cars['Specification summary'] == car3specification)]])
    cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car4brand) & (cars['Serie'] == car4serie) & (cars['Specification summary'] == car4specification)]])
    cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car5brand) & (cars['Serie'] == car5serie) & (cars['Specification summary'] == car5specification)]])
    
    figs = []

    fig = px.bar(cars_filtered,
                 x='HP',
                 y='Name',
                 title='Power comparison',
                 color='Company',
                 width=1200,
                 height=300,
                 )
    fig.show()
    fig = go.FigureWidget(fig)
    figs.append(fig)

    fig = px.bar(cars_filtered,
                 x='Torque(Nm)',
                 y='Name',
                 title='Torque comparison',
                 color='Company',
                 width=1200,
                 height=300,
                 )
    fig.show()

    fig = go.FigureWidget(fig)
    figs.append(fig)

    fig = px.bar(cars_filtered,
                 x='Acceleration 0-62 Mph (0-100 kph)',
                 y='Name',
                 title='Accelaration comparison',
                 color='Company',
                 width=1200,
                 height=300,
                 )
    fig.show()

    fig = go.FigureWidget(fig)
    figs.append(fig)


    HBox(figs)


In [92]:
plot_car_comparison(car1['Company'], car1['Serie'], car1['Specification'], car2['Company'], car2['Serie'], car2['Specification'], car3['Company'], car3['Serie'], car3['Specification'], car4['Company'], car4['Serie'], car4['Specification'], car5['Company'], car5['Serie'], car5['Specification'])


Message serialization failed with:
Out of range float values are not JSON compliant
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant



In [93]:
#Make a radar chart in plotly to compare the cars

def plot_radar_chart(car1brand, car1serie, car1specification, car2brand, car2serie, car2specification, car3brand, car3serie, car3specification, car4brand, car4serie, car4specification, car5brand, car5serie, car5specification):
        
        cars_filtered = cars[(cars['Company'] == car1brand) & (cars['Serie'] == car1serie) & (cars['Specification summary'] == car1specification)]
        cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car2brand) & (cars['Serie'] == car2serie) & (cars['Specification summary'] == car2specification)]])
        cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car3brand) & (cars['Serie'] == car3serie) & (cars['Specification summary'] == car3specification)]])
        cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car4brand) & (cars['Serie'] == car4serie) & (cars['Specification summary'] == car4specification)]])
        cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car5brand) & (cars['Serie'] == car5serie) & (cars['Specification summary'] == car5specification)]])
        
        fig = go.Figure()
    
        for i in range(len(cars_filtered)):
            fig.add_trace(go.Scatterpolar(
                r=[cars_filtered['HP'].iloc[i], cars_filtered['Torque(Nm)'].iloc[i], cars_filtered['Acceleration 0-62 Mph (0-100 kph)'].iloc[i]],
                theta=['HP', 'Torque(Nm)', 'Acceleration 0-62 Mph (0-100 kph)'],
                fill='toself',
                name=cars_filtered['Name'].iloc[i]
            ))
    
        fig.update_layout(
        polar=dict(
            radialaxis=dict(
            visible=True,
            range=[0, 1000]
            )),
        showlegend=True
        )
    
        fig.show()

In [94]:
#Run the radar chart

plot_radar_chart(car1['Company'], car1['Serie'], car1['Specification'], car2['Company'], car2['Serie'], car2['Specification'], car3['Company'], car3['Serie'], car3['Specification'], car4['Company'], car4['Serie'], car4['Specification'], car5['Company'], car5['Serie'], car5['Specification'])




Fazer Parallel Coordinates em vez do Radar
Meter o Radar Chart a mostrar dados do Parallel
Uma página só para multiplos
Mostrar top 10 para cada atributo de cada grupo apresentado no Parallel
Criar diagramas de fluxo para cada interação
Criar diagramas para utilizadores básicos e avançados

In [95]:
#Parallel coordinates plot

def plot_parallel_coordinates(car1brand, car1serie, car1specification, car2brand, car2serie, car2specification, car3brand, car3serie, car3specification, car4brand, car4serie, car4specification, car5brand, car5serie, car5specification):
            
            cars_filtered = cars[(cars['Company'] == car1brand) & (cars['Serie'] == car1serie) & (cars['Specification summary'] == car1specification)]
            cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car2brand) & (cars['Serie'] == car2serie) & (cars['Specification summary'] == car2specification)]])
            cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car3brand) & (cars['Serie'] == car3serie) & (cars['Specification summary'] == car3specification)]])
            cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car4brand) & (cars['Serie'] == car4serie) & (cars['Specification summary'] == car4specification)]])
            cars_filtered = pd.concat([cars_filtered, cars[(cars['Company'] == car5brand) & (cars['Serie'] == car5serie) & (cars['Specification summary'] == car5specification)]])
            
            fig = px.parallel_coordinates(cars_filtered)
            
            fig.show()

#Run the parallel coordinates plot

plot_parallel_coordinates(car1['Company'], car1['Serie'], car1['Specification'], car2['Company'], car2['Serie'], car2['Specification'], car3['Company'], car3['Serie'], car3['Specification'], car4['Company'], car4['Serie'], car4['Specification'], car5['Company'], car5['Serie'], car5['Specification'])

In [96]:
cars.to_csv('cars_named.csv', index=False)