# Filtering and Ordering

In [86]:
import pandas as pd
pd.set_option('display.max.rows', 10)

In [87]:
df = pd.read_csv(r'/Users/timothypark/Documents/portfolios/timpark99.github.io/Filtering and Ordering in Pandas/Electric_Vehicle_Population_Size_History_By_County_.csv')

In [88]:
df

Unnamed: 0,Date,County,State,Vehicle Primary Use,Battery Electric Vehicles (BEVs),Plug-In Hybrid Electric Vehicles (PHEVs),Electric Vehicle (EV) Total,Non-Electric Vehicle Total,Total Vehicles,Percent Electric Vehicles
0,September 30 2022,Riverside,CA,Passenger,7,0,7,460,467,1.50
1,December 31 2022,Prince William,VA,Passenger,1,2,3,188,191,1.57
2,January 31 2020,Dakota,MN,Passenger,0,1,1,32,33,3.03
3,June 30 2022,Ferry,WA,Truck,0,0,0,3575,3575,0.00
4,July 31 2021,Douglas,CO,Passenger,0,1,1,83,84,1.19
...,...,...,...,...,...,...,...,...,...,...
20814,January 31 2023,Rockingham,NH,Passenger,1,0,1,14,15,6.67
20815,July 31 2020,Carson City,NV,Passenger,1,0,1,10,11,9.09
20816,February 28 2022,Island,WA,Passenger,744,350,1094,62257,63351,1.73
20817,December 31 2020,San Diego,CA,Passenger,14,2,16,2724,2740,0.58


In [89]:
df.dtypes

Date                                         object
County                                       object
State                                        object
Vehicle Primary Use                          object
Battery Electric Vehicles (BEVs)             object
Plug-In Hybrid Electric Vehicles (PHEVs)     object
Electric Vehicle (EV) Total                  object
Non-Electric Vehicle Total                   object
Total Vehicles                               object
Percent Electric Vehicles                   float64
dtype: object

In [90]:
# initially tried to change dtype to int but could not because the commas were in the object
# replace eliminates the commas in the numbers
# to_numeric will change our data type to int

df = df.replace(',','', regex=True)
df['Total Vehicles'] = pd.to_numeric(df['Total Vehicles'])

In [91]:
df.dtypes

Date                                         object
County                                       object
State                                        object
Vehicle Primary Use                          object
Battery Electric Vehicles (BEVs)             object
Plug-In Hybrid Electric Vehicles (PHEVs)     object
Electric Vehicle (EV) Total                  object
Non-Electric Vehicle Total                   object
Total Vehicles                                int64
Percent Electric Vehicles                   float64
dtype: object

In [92]:
# filters dataset to where it will show data only when Total Vehicles is less than 10

df[df['Total Vehicles'] < 10]

Unnamed: 0,Date,County,State,Vehicle Primary Use,Battery Electric Vehicles (BEVs),Plug-In Hybrid Electric Vehicles (PHEVs),Electric Vehicle (EV) Total,Non-Electric Vehicle Total,Total Vehicles,Percent Electric Vehicles
8,March 31 2020,DeKalb,IN,Passenger,1,0,1,1,2,50.00
13,November 30 2020,Manassas,VA,Passenger,0,1,1,5,6,16.67
16,May 31 2020,Monroe,IL,Passenger,1,0,1,3,4,25.00
22,February 29 2020,Owyhee,ID,Passenger,1,0,1,3,4,25.00
23,June 30 2022,Clinton,PA,Passenger,1,0,1,1,2,50.00
...,...,...,...,...,...,...,...,...,...,...
20757,January 31 2024,Ulster,NY,Passenger,1,0,1,5,6,16.67
20778,February 28 2019,Franklin,MA,Passenger,0,1,1,1,2,50.00
20789,September 30 2023,Sheridan,WY,Passenger,1,0,1,3,4,25.00
20790,May 31 2022,Eagle,CO,Passenger,1,0,1,7,8,12.50


In [108]:
# for demonstration purposes, drop duplicates of same county name but keep the first county that shows up in order to keep unique rows

df = df.drop_duplicates(subset='County', keep='first')

In [109]:
df

Unnamed: 0,Date,County,State,Vehicle Primary Use,Battery Electric Vehicles (BEVs),Plug-In Hybrid Electric Vehicles (PHEVs),Electric Vehicle (EV) Total,Non-Electric Vehicle Total,Total Vehicles,Percent Electric Vehicles
0,September 30 2022,Riverside,CA,Passenger,7,0,7,460,467,1.50
1,December 31 2022,Prince William,VA,Passenger,1,2,3,188,191,1.57
2,January 31 2020,Dakota,MN,Passenger,0,1,1,32,33,3.03
3,June 30 2022,Ferry,WA,Truck,0,0,0,3575,3575,0.00
4,July 31 2021,Douglas,CO,Passenger,0,1,1,83,84,1.19
...,...,...,...,...,...,...,...,...,...,...
9152,December 31 2023,Spotsylvania,VA,Passenger,0,1,1,21,22,4.55
9504,December 31 2023,Worcester,MA,Passenger,1,0,1,9,10,10.00
12905,January 31 2017,Union,OH,Passenger,1,0,1,4,5,20.00
14186,November 30 2021,Bernalillo,NM,Passenger,1,0,1,110,111,0.90


In [111]:
# isin function checks whether each element in the DataFrame is contained in values

specific_counties = ['DeKalb', 'Manassas']
df[df['County'].isin(specific_counties)]

Unnamed: 0,Date,County,State,Vehicle Primary Use,Battery Electric Vehicles (BEVs),Plug-In Hybrid Electric Vehicles (PHEVs),Electric Vehicle (EV) Total,Non-Electric Vehicle Total,Total Vehicles,Percent Electric Vehicles
8,March 31 2020,DeKalb,IN,Passenger,1,0,1,1,2,50.0
13,November 30 2020,Manassas,VA,Passenger,0,1,1,5,6,16.67


In [114]:
# contains function
# na=False ignores null values

df[df['County'].str.contains('Eagle', na=False)]

Unnamed: 0,Date,County,State,Vehicle Primary Use,Battery Electric Vehicles (BEVs),Plug-In Hybrid Electric Vehicles (PHEVs),Electric Vehicle (EV) Total,Non-Electric Vehicle Total,Total Vehicles,Percent Electric Vehicles
732,July 31 2020,Eagle,CO,Passenger,1,0,1,8,9,11.11


In [115]:
# filtering on index
# change the index to County

df2 = df.set_index('County')
df2

Unnamed: 0_level_0,Date,State,Vehicle Primary Use,Battery Electric Vehicles (BEVs),Plug-In Hybrid Electric Vehicles (PHEVs),Electric Vehicle (EV) Total,Non-Electric Vehicle Total,Total Vehicles,Percent Electric Vehicles
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Riverside,September 30 2022,CA,Passenger,7,0,7,460,467,1.50
Prince William,December 31 2022,VA,Passenger,1,2,3,188,191,1.57
Dakota,January 31 2020,MN,Passenger,0,1,1,32,33,3.03
Ferry,June 30 2022,WA,Truck,0,0,0,3575,3575,0.00
Douglas,July 31 2021,CO,Passenger,0,1,1,83,84,1.19
...,...,...,...,...,...,...,...,...,...
Spotsylvania,December 31 2023,VA,Passenger,0,1,1,21,22,4.55
Worcester,December 31 2023,MA,Passenger,1,0,1,9,10,10.00
Union,January 31 2017,OH,Passenger,1,0,1,4,5,20.00
Bernalillo,November 30 2021,NM,Passenger,1,0,1,110,111,0.90


In [116]:
# axis=1 is the column axis and it will give us column data for specified items

df2.filter(items = ['State', 'Vehicle Primary Use'], axis=1)

Unnamed: 0_level_0,State,Vehicle Primary Use
County,Unnamed: 1_level_1,Unnamed: 2_level_1
Riverside,CA,Passenger
Prince William,VA,Passenger
Dakota,MN,Passenger
Ferry,WA,Truck
Douglas,CO,Passenger
...,...,...
Spotsylvania,VA,Passenger
Worcester,MA,Passenger
Union,OH,Passenger
Bernalillo,NM,Passenger


In [117]:
# axis=0 is the row axis and it will give us the row data when we specify items = 'County'

df2.filter(items = ['Island'], axis=0)

Unnamed: 0_level_0,Date,State,Vehicle Primary Use,Battery Electric Vehicles (BEVs),Plug-In Hybrid Electric Vehicles (PHEVs),Electric Vehicle (EV) Total,Non-Electric Vehicle Total,Total Vehicles,Percent Electric Vehicles
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Island,December 31 2021,WA,Truck,0,0,0,21488,21488,0.0


In [118]:
# loc will give you the value of the targeted index

df2.loc['Union']

Date                                        January 31 2017
State                                                    OH
Vehicle Primary Use                               Passenger
Battery Electric Vehicles (BEVs)                          1
Plug-In Hybrid Electric Vehicles (PHEVs)                  0
Electric Vehicle (EV) Total                               1
Non-Electric Vehicle Total                                4
Total Vehicles                                            5
Percent Electric Vehicles                              20.0
Name: Union, dtype: object

In [120]:
df2.iloc['Union']

TypeError: Cannot index by location index with a non-integer key