In [210]:
# importing libraries
import requests
from bs4 import BeautifulSoup
from pprint import pprint
import pandas as pd

#Pulling the html code from ev-database.org electric cars webpage

url='https://ev-database.org/#sort:path~type~order=.rank~number~desc|range-slider-range:prev~next=0~1200|range-slider-acceleration:prev~next=2~23|range-slider-topspeed:prev~next=110~450|range-slider-battery:prev~next=10~200|range-slider-towweight:prev~next=0~2500|range-slider-fastcharge:prev~next=0~1500|paging:currentPage=0|paging:number=all'

data=requests.get(url)

# Creating an empty list to hold the data

ev_data=[]

# Creating a variable to hold the scraped data

html = BeautifulSoup(data.text, 'html.parser')

#creating a variable to allow structuring of data using html containers and class names

evcars = html.select('div.list-item')

# iterating through evcars to scrape the desired data using class names to identify relevant data

for evcar in evcars:
    
    make = evcar.select('.title')[0].get_text()
    model = evcar.select('.model')[0].get_text()
    battery_Kwh = evcar.select('.battery')[0].get_text()
    acceleration = evcar.select('.acceleration')[0].get_text()
    top_speed = evcar.select('.topspeed')[0].get_text()
    erange_real = evcar.select('.erange_real')[0].get_text()
    efficiency = evcar.select('.efficiency')[0].get_text()
    price_in_de = evcar.select('.country_de')[0].get_text()
    
    #adding the data to ev_data
    
    ev_data.append({"Make": make, "Model": model, "Battery_(Kwh)": battery_Kwh, "Acceleration_in_Secs_(1-100)": acceleration,
                    "Top_Speed_(km/h)": top_speed, "Range_(km)": erange_real, "Efficiency_(Wh/km)": efficiency, "Price_in_Germany (€)": price_in_de})
    
# Converting the list of dictionaries in to a Pandas DataFrame    

ev_df = pd.DataFrame(ev_data)


# Print the head of the dataframe to check structure and check dataframe shape

print(ev_df.head())

print(ev_df.shape)

# Check the DataFrame info

print(ev_df.info())

    


                                  Make                          Model  \
0  Tesla Model 3 Long Range Dual Motor  Model 3 Long Range Dual Motor   
1                      BMW i4 eDrive40                    i4 eDrive40   
2                       Tesla Model 3                        Model 3    
3                Dacia Spring Electric                Spring Electric   
4                           Kia EV6 GT                         EV6 GT   

  Battery_(Kwh) Acceleration_in_Secs_(1-100) Top_Speed_(km/h) Range_(km)  \
0            75                      4.4 sec         233 km/h     485 km   
1          80.7                      5.7 sec         190 km/h     470 km   
2          57.5                      6.1 sec         225 km/h     380 km   
3          26.8                     15.0 sec         125 km/h     170 km   
4          72.5                      3.5 sec         260 km/h     360 km   

  Efficiency_(Wh/km) Price_in_Germany (€)  
0          155 Wh/km              €60,260  
1          172 W

In [211]:
# Stripping out extraneous text from dataframe columns to facilitate analysis later

ev_df['Acceleration_in_Secs_(1-100)'] = ev_df['Acceleration_in_Secs_(1-100)'].str.replace("sec","")

ev_df['Top_Speed_(km/h)'] = ev_df['Top_Speed_(km/h)'].str.replace(" km/h","")

ev_df['Range_(km)'] = ev_df['Range_(km)'].str.replace(" km","")

ev_df['Efficiency_(Wh/km)'] = ev_df['Efficiency_(Wh/km)'].str.replace(" Wh/km", "")

ev_df['Price_in_Germany (€)'] = ev_df['Price_in_Germany (€)'].str.replace("€", "")

ev_df['Price_in_Germany (€)'] = ev_df['Price_in_Germany (€)'].str.replace(",", "")

ev_df['Price_in_Germany (€)'] = ev_df['Price_in_Germany (€)'].str.replace("*", "")

ev_df['Price_in_Germany (€)'] = ev_df['Price_in_Germany (€)'].str.replace("N/A", "")

print(ev_df.head())  

print(ev_df.info())  

                                  Make                          Model  \
0  Tesla Model 3 Long Range Dual Motor  Model 3 Long Range Dual Motor   
1                      BMW i4 eDrive40                    i4 eDrive40   
2                       Tesla Model 3                        Model 3    
3                Dacia Spring Electric                Spring Electric   
4                           Kia EV6 GT                         EV6 GT   

  Battery_(Kwh) Acceleration_in_Secs_(1-100) Top_Speed_(km/h) Range_(km)  \
0            75                         4.4               233        485   
1          80.7                         5.7               190        470   
2          57.5                         6.1               225        380   
3          26.8                        15.0               125        170   
4          72.5                         3.5               260        360   

  Efficiency_(Wh/km) Price_in_Germany (€)  
0                155                60260  
1               

  ev_df['Price_in_Germany (€)'] = ev_df['Price_in_Germany (€)'].str.replace("*", "")


In [212]:
# converting objects to numeric values where possible to facilitate analysis later

cols = ev_df.columns.drop(['Make' , 'Model'])

ev_df[cols]=ev_df[cols].apply(pd.to_numeric, errors='ignore')

print(ev_df.info())
print(ev_df.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 228 entries, 0 to 227
Data columns (total 8 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Make                          228 non-null    object 
 1   Model                         228 non-null    object 
 2   Battery_(Kwh)                 228 non-null    float64
 3   Acceleration_in_Secs_(1-100)  228 non-null    float64
 4   Top_Speed_(km/h)              228 non-null    int64  
 5   Range_(km)                    228 non-null    int64  
 6   Efficiency_(Wh/km)            228 non-null    int64  
 7   Price_in_Germany (€)          214 non-null    float64
dtypes: float64(3), int64(3), object(2)
memory usage: 14.4+ KB
None
                                  Make                          Model  \
0  Tesla Model 3 Long Range Dual Motor  Model 3 Long Range Dual Motor   
1                      BMW i4 eDrive40                    i4 eDrive40   
2                  

In [213]:
#checking for non numeric values in Price column

price_list=ev_df['Price_in_Germany (€)'].tolist()
            
print(price_list)

[60260.0, 58300.0, 48260.0, 20490.0, 65990.0, 48900.0, 106374.0, 50970.0, 35350.0, 59965.0, 47541.0, 44750.0, 48990.0, 98000.0, 69900.0, 55000.0, 41850.0, 39000.0, 41700.0, 33850.0, 39090.0, 135529.0, 66965.0, 37990.0, 35450.0, 150000.0, 31990.0, 47500.0, 52890.0, 115000.0, 50000.0, 55000.0, 150000.0, 31950.0, 27560.0, 34490.0, 45100.0, 29990.0, 70626.0, 138200.0, 37550.0, 48500.0, 77300.0, 57989.0, 44450.0, 57650.0, 47490.0, 35993.0, 66069.0, 67300.0, 35460.0, 45080.0, 29900.0, 39650.0, 150000.0, 54475.0, 44300.0, 77300.0, nan, 37220.0, 57700.0, 50415.0, 63700.0, 47550.0, 41900.0, 41900.0, 24790.0, 34640.0, 59990.0, 85000.0, 26895.0, 33990.0, 34110.0, 46495.0, 186336.0, 56150.0, 47000.0, 47000.0, 132200.0, 50990.0, 49500.0, 35650.0, 62560.0, 33990.0, 125000.0, 34600.0, 29990.0, 36960.0, 47500.0, 32500.0, 42620.0, 38490.0, 44990.0, 160000.0, 53615.0, 53600.0, nan, 42700.0, 35200.0, 71388.0, 152546.0, 19120.0, 99800.0, 81500.0, 37790.0, 55311.0, nan, 23560.0, 30560.0, 48850.0, 42600.0, 

In [221]:
# Converting Price from a string to a float

ev_df['Price_in_Germany (€)'] = ev_df['Price_in_Germany (€)'].apply(pd.to_numeric, errors = 'coerce')

print(price_list)

[60260.0, 58300.0, 48260.0, 20490.0, 65990.0, 48900.0, 106374.0, 50970.0, 35350.0, 59965.0, 47541.0, 44750.0, 48990.0, 98000.0, 69900.0, 55000.0, 41850.0, 39000.0, 41700.0, 33850.0, 39090.0, 135529.0, 66965.0, 37990.0, 35450.0, 150000.0, 31990.0, 47500.0, 52890.0, 115000.0, 50000.0, 55000.0, 150000.0, 31950.0, 27560.0, 34490.0, 45100.0, 29990.0, 70626.0, 138200.0, 37550.0, 48500.0, 77300.0, 57989.0, 44450.0, 57650.0, 47490.0, 35993.0, 66069.0, 67300.0, 35460.0, 45080.0, 29900.0, 39650.0, 150000.0, 54475.0, 44300.0, 77300.0, nan, 37220.0, 57700.0, 50415.0, 63700.0, 47550.0, 41900.0, 41900.0, 24790.0, 34640.0, 59990.0, 85000.0, 26895.0, 33990.0, 34110.0, 46495.0, 186336.0, 56150.0, 47000.0, 47000.0, 132200.0, 50990.0, 49500.0, 35650.0, 62560.0, 33990.0, 125000.0, 34600.0, 29990.0, 36960.0, 47500.0, 32500.0, 42620.0, 38490.0, 44990.0, 160000.0, 53615.0, 53600.0, nan, 42700.0, 35200.0, 71388.0, 152546.0, 19120.0, 99800.0, 81500.0, 37790.0, 55311.0, nan, 23560.0, 30560.0, 48850.0, 42600.0, 

In [222]:
print(ev_df.info())
print(ev_df.head(20))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 228 entries, 0 to 227
Data columns (total 8 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Make                          228 non-null    object 
 1   Model                         228 non-null    object 
 2   Battery_(Kwh)                 228 non-null    float64
 3   Acceleration_in_Secs_(1-100)  228 non-null    float64
 4   Top_Speed_(km/h)              228 non-null    int64  
 5   Range_(km)                    228 non-null    int64  
 6   Efficiency_(Wh/km)            228 non-null    int64  
 7   Price_in_Germany (€)          214 non-null    float64
dtypes: float64(3), int64(3), object(2)
memory usage: 14.4+ KB
None
                                   Make                          Model  \
0   Tesla Model 3 Long Range Dual Motor  Model 3 Long Range Dual Motor   
1                       BMW i4 eDrive40                    i4 eDrive40   
2               

In [224]:
# Preliminary analysis

print("The median battery capacity is "+  str(ev_df['Battery_(Kwh)'].median()) + "Kwh."'\n')

print("The car with the highest battery capacity is the "+ ev_df.loc[ev_df['Battery_(Kwh)'].idxmax(),'Make'] + " at " + str(ev_df['Battery_(Kwh)'].max()) + "Kwh."'\n')

print("The car with the highest battery capacity is the "+ ev_df.loc[ev_df['Battery_(Kwh)'].idxmin(),'Make'] + " at " + str(ev_df['Battery_(Kwh)'].min()) + "Kwh."'\n')

print("The median range is " + str(ev_df['Range_(km)'].median()) +"km"'\n')

print("The car with the longest range is the "+ ev_df.loc[ev_df['Range_(km)'].idxmax(),'Make'] + " at " + str(ev_df['Range_(km)'].max()) + "km."'\n')

print("The car with the shortest range is the "+ ev_df.loc[ev_df['Range_(km)'].idxmin(),'Make'] + " at " + str(ev_df['Range_(km)'].min()) + "km."'\n')

print("The median energy efficiency is " + str(ev_df['Efficiency_(Wh/km)'].median()) +" Wh/km"'\n')

print("The most energy efficient car is the "+ ev_df.loc[ev_df['Efficiency_(Wh/km)'].idxmin(),'Make'] + " at " + str(ev_df['Efficiency_(Wh/km)'].min()) + "Wh/km."'\n')

print("The least energy efficient car is the "+ ev_df.loc[ev_df['Efficiency_(Wh/km)'].idxmax(),'Make'] + " at " + str(ev_df['Efficiency_(Wh/km)'].max()) + "Wh/km."'\n')

print("The median acceleration rate from 0 to 100 km/h is " + str(ev_df['Acceleration_in_Secs_(1-100)'].median()) + "sec"'\n')

print("The car with the fastest acceleration is the "+ ev_df.loc[ev_df['Acceleration_in_Secs_(1-100)'].idxmin(),'Make'] + " at " + str(ev_df['Acceleration_in_Secs_(1-100)'].min()) + "secs."'\n')

print("The car with the slowest acceleration is the "+ ev_df.loc[ev_df['Acceleration_in_Secs_(1-100)'].idxmax(),'Make'] + " at " + str(ev_df['Acceleration_in_Secs_(1-100)'].max()) + "secs."'\n')

print("The median top speed is " + str(ev_df['Top_Speed_(km/h)'].median()) + "km/h"'\n')

print("The car capable of the highest speed is the "+ ev_df.loc[ev_df['Top_Speed_(km/h)'].idxmax(),'Make'] + " at " + str(ev_df['Top_Speed_(km/h)'].max()) + "km/h."'\n')

print("The car with the lowest maximum speed is the "+ ev_df.loc[ev_df['Top_Speed_(km/h)'].idxmin(),'Make'] + " at " + str(ev_df['Top_Speed_(km/h)'].min()) + "km/h."'\n')

print("The median price for an electric car in Germany is €" + str(int(ev_df['Price_in_Germany (€)'].median())) + '\n')

print("The most expensive car is the "+ ev_df.loc[ev_df['Price_in_Germany (€)'].idxmax(),'Make'] + " at €" + str(int(ev_df['Price_in_Germany (€)'].max()))+'\n')

print("The least expensive car is the "+ ev_df.loc[ev_df['Price_in_Germany (€)'].idxmin(),'Make'] + " at €" + str(int(ev_df['Price_in_Germany (€)'].min()))+'\n')

The median battery capacity is 66.5Kwh.

The car with the highest battery capacity is the Mercedes EQS 450+ at 107.8Kwh.

The car with the highest battery capacity is the Smart EQ forfour  at 16.7Kwh.

The median range is 337.5km

The car with the longest range is the Mercedes EQS 450+ at 640km.

The car with the shortest range is the Smart EQ forfour  at 95km.

The median energy efficiency is 190.0 Wh/km

The most energy efficient car is the Lightyear One  at 104Wh/km.

The least energy efficient car is the Mercedes EQV 300 Long at 295Wh/km.

The median acceleration rate from 0 to 100 km/h is 7.95sec

The car with the fastest acceleration is the Tesla Model S Plaid at 2.1secs.

The car with the slowest acceleration is the Dacia Spring Electric at 15.0secs.

The median top speed is 160.0km/h

The car capable of the highest speed is the Tesla Model S Plaid at 322km/h.

The car with the lowest maximum speed is the Dacia Spring Electric at 125km/h.

The median price for an electric car in

In [189]:
#exporting dataframe to csv
ev_df.to_csv('ev_df.csv', index=True)

In [225]:
import matplotlib.pyplot as plt
import seaborn as sns