In [1]:
import pandas as pd

#### Load the Towed Cars dataset
We use an edited version of this dataset: https://catalog.data.gov/dataset/towed-cars-for-the-past-30-days

In [3]:
towed_cars = pd.read_csv('/datasets/Towed_Cars.csv')

towed_cars

FileNotFoundError: [Errno 2] File /datasets/Towed_Cars.csv does not exist: '/datasets/Towed_Cars.csv'

#### Accessing a single column

In [3]:
towed_cars['Color']

0      BLUE
1      BLUE
2      BLUE
3      BLUE
4      BLUE
5      BLUE
6      BLUE
7     GREEN
8     GREEN
9     GREEN
10    GREEN
11     GRAY
12     GRAY
13     GRAY
14     GRAY
15     GRAY
Name: Color, dtype: object

#### Acessing multiple columns

In [4]:
towed_cars[['Make', 'Color']]

Unnamed: 0,Make,Color
0,NISSAN,BLUE
1,FORD ESCAP,BLUE
2,CHEVY,BLUE
3,HONDA,BLUE
4,SATURN,BLUE
5,TOYOTA,BLUE
6,MERCURY,BLUE
7,ACURA RLX,GREEN
8,HONDA-X,GREEN
9,NISSAN-II,GREEN


#### The head() function returns the initial rows of the DataFrame
By default it returns the first 5 rows of the DataFrame

In [5]:
towed_cars[['Make', 'Color']].head()

Unnamed: 0,Make,Color
0,NISSAN,BLUE
1,FORD ESCAP,BLUE
2,CHEVY,BLUE
3,HONDA,BLUE
4,SATURN,BLUE


#### Specify number of initial rows to return
The head() function takes in an argument for the number of rows

In [6]:
towed_cars[['Make', 'Color']].head(9)

Unnamed: 0,Make,Color
0,NISSAN,BLUE
1,FORD ESCAP,BLUE
2,CHEVY,BLUE
3,HONDA,BLUE
4,SATURN,BLUE
5,TOYOTA,BLUE
6,MERCURY,BLUE
7,ACURA RLX,GREEN
8,HONDA-X,GREEN


#### The tail() function to get the last few rows
Similar to head() but for the end of the DataFrame. Also returns the last 5 rows by default.

In [7]:
towed_cars.tail()

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
11,353290,MODERN GARAGE,QT,AM91398,1999,NISSAN-III,MAXIMA,GRAY,8-20-2018,190
12,353291,FRIENDLY AUTO BODY & TOWING,QT,907HFD,2001,SUBARU,LEGACY,GRAY,8-20-2018,188
13,353292,CROSS COUNTRY AUTO_X,QT,976WVV,2012,HONDA-II,ACCORD,GRAY,8-20-2018,210
14,353293,CENTRAL AUTO & TRANSPORT,QT,AG79812,2013,SUBARU-X,LEGACY,GRAY,8-20-2018,215
15,353294,CROSS COUNTRY AUTO,QT,AM31852,1995,ACURA-Fo,3.2 TL,GRAY,8-20-2018,220


#### loc can also be used to reference the contents of the DataFrame as a list

In [8]:
towed_cars.loc[6]

TowNum                       353284
Tow_Firm         CROSS COUNTRY AUTO
Vehicle_State                    CT
Vehicle_Plate                5AUKV8
Vehicle_Year                   2000
Make                        MERCURY
Model                    GRAND MARQ
Color                          BLUE
Date                      8-20-2018
Time                            166
Name: 6, dtype: object

#### Slicing in DataFrames
Like with Python lists, we can slice Pandas DataFrames

In [9]:
towed_cars.loc[3:7]

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
3,353281,A & N AUTO,CT,193XZS,2002,HONDA,ACCORD,BLUE,8-20-2018,185
4,353282,CENTRAL AUTO & TRANSPORT,CT,AP00237,2005,SATURN,RELAY 3,BLUE,8-20-2018,190
5,353283,CENTRAL GARAGE,CT,410ZZN,2006,TOYOTA,SIENNA CE/,BLUE,8-20-2018,177
6,353284,CROSS COUNTRY AUTO,CT,5AUKV8,2000,MERCURY,GRAND MARQ,BLUE,8-20-2018,166
7,353285,CAPITOL AUTOMOTIVE LLC,AL,702WUF,2014,ACURA RLX,TECH AUDIO,GREEN,8-20-2018,120


#### Slicing in 2 dimensions
Since DataFrames can be considered to be 2-D arrays, we can slice along both rows and columns

In [10]:
 towed_cars.loc[9 : , 'TowNum' : 'Vehicle_Year']

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year
9,353277,MODERN GARAGE,AL,XM91398,1990
10,353288,MODERN AUTOMOTIVE LLC,AL,702WUF,2018
11,353290,MODERN GARAGE,QT,AM91398,1999
12,353291,FRIENDLY AUTO BODY & TOWING,QT,907HFD,2001
13,353292,CROSS COUNTRY AUTO_X,QT,976WVV,2012
14,353293,CENTRAL AUTO & TRANSPORT,QT,AG79812,2013
15,353294,CROSS COUNTRY AUTO,QT,AM31852,1995


In [11]:
towed_cars.loc[[1, 6, 13], : ]

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
1,353279,RENO'S AUTO BODY,CT,AK15141,2007,FORD ESCAP,XLT,BLUE,8-20-2018,170
6,353284,CROSS COUNTRY AUTO,CT,5AUKV8,2000,MERCURY,GRAND MARQ,BLUE,8-20-2018,166
13,353292,CROSS COUNTRY AUTO_X,QT,976WVV,2012,HONDA-II,ACCORD,GRAY,8-20-2018,210


### Indexing with iloc
iloc is like loc, but the values it accepts must be integers. While loc accepts index labels, iloc only takes index positions. 

#### iloc is like loc if the index matches the positions
Since the row is indexed with integers from 0 onwards, we can access rows just like with loc

In [12]:
towed_cars.iloc[2 : 6]

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
2,353280,WHITEY'S FRAME SHOP,CT,408HFD,2013,CHEVY,RELAY 3,BLUE,8-20-2018,175
3,353281,A & N AUTO,CT,193XZS,2002,HONDA,ACCORD,BLUE,8-20-2018,185
4,353282,CENTRAL AUTO & TRANSPORT,CT,AP00237,2005,SATURN,RELAY 3,BLUE,8-20-2018,190
5,353283,CENTRAL GARAGE,CT,410ZZN,2006,TOYOTA,SIENNA CE/,BLUE,8-20-2018,177


In [13]:
towed_cars.loc[2 : 6]

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
2,353280,WHITEY'S FRAME SHOP,CT,408HFD,2013,CHEVY,RELAY 3,BLUE,8-20-2018,175
3,353281,A & N AUTO,CT,193XZS,2002,HONDA,ACCORD,BLUE,8-20-2018,185
4,353282,CENTRAL AUTO & TRANSPORT,CT,AP00237,2005,SATURN,RELAY 3,BLUE,8-20-2018,190
5,353283,CENTRAL GARAGE,CT,410ZZN,2006,TOYOTA,SIENNA CE/,BLUE,8-20-2018,177
6,353284,CROSS COUNTRY AUTO,CT,5AUKV8,2000,MERCURY,GRAND MARQ,BLUE,8-20-2018,166


#### Retrieving columns based on position
Since the column headers are text labels, we cannot use them with iloc

In [13]:
towed_cars.iloc[2 : 6, 'Vehicle_Plate' : 'Color']

TypeError: cannot do slice indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [Vehicle_Plate] of <class 'str'>

In [14]:
towed_cars.iloc[2 : 6, 3 : 7]

Unnamed: 0,Vehicle_Plate,Vehicle_Year,Make,Model
2,408HFD,2013,CHEVY,RELAY 3
3,193XZS,2002,HONDA,ACCORD
4,AP00237,2005,SATURN,RELAY 3
5,410ZZN,2006,TOYOTA,SIENNA CE/


In [15]:
towed_cars.iloc[[3,7,10], : ]

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
3,353281,A & N AUTO,CT,193XZS,2002,HONDA,ACCORD,BLUE,8-20-2018,185
7,353285,CAPITOL AUTOMOTIVE LLC,AL,702WUF,2014,ACURA RLX,TECH AUDIO,GREEN,8-20-2018,120
10,353288,MODERN AUTOMOTIVE LLC,AL,702WUF,2018,ACURA RLX New,TECH AUDIO,GREEN,8-20-2018,140


### Filter based on conditions
Pandas allows us to filter out rows based on conditions for the value of the contents in the row

#### Filtering on single numeric condition

In [16]:
towed_cars[towed_cars['Vehicle_Year'] > 2005]

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
1,353279,RENO'S AUTO BODY,CT,AK15141,2007,FORD ESCAP,XLT,BLUE,8-20-2018,170
2,353280,WHITEY'S FRAME SHOP,CT,408HFD,2013,CHEVY,RELAY 3,BLUE,8-20-2018,175
5,353283,CENTRAL GARAGE,CT,410ZZN,2006,TOYOTA,SIENNA CE/,BLUE,8-20-2018,177
7,353285,CAPITOL AUTOMOTIVE LLC,AL,702WUF,2014,ACURA RLX,TECH AUDIO,GREEN,8-20-2018,120
8,353286,CORONA'S AUTO PARTS INC,AL,427WVV,2008,HONDA-X,CIVIC,GREEN,8-20-2018,135
10,353288,MODERN AUTOMOTIVE LLC,AL,702WUF,2018,ACURA RLX New,TECH AUDIO,GREEN,8-20-2018,140
13,353292,CROSS COUNTRY AUTO_X,QT,976WVV,2012,HONDA-II,ACCORD,GRAY,8-20-2018,210
14,353293,CENTRAL AUTO & TRANSPORT,QT,AG79812,2013,SUBARU-X,LEGACY,GRAY,8-20-2018,215


#### Filtering on text value in a cell

In [17]:
towed_cars[towed_cars['Vehicle_State'] == 'CT' ]

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
0,353278,BENTON AUTO BODY,CT,AM91301,2003,NISSAN,XTERRA XE,BLUE,8-20-2018,180
1,353279,RENO'S AUTO BODY,CT,AK15141,2007,FORD ESCAP,XLT,BLUE,8-20-2018,170
2,353280,WHITEY'S FRAME SHOP,CT,408HFD,2013,CHEVY,RELAY 3,BLUE,8-20-2018,175
3,353281,A & N AUTO,CT,193XZS,2002,HONDA,ACCORD,BLUE,8-20-2018,185
4,353282,CENTRAL AUTO & TRANSPORT,CT,AP00237,2005,SATURN,RELAY 3,BLUE,8-20-2018,190
5,353283,CENTRAL GARAGE,CT,410ZZN,2006,TOYOTA,SIENNA CE/,BLUE,8-20-2018,177
6,353284,CROSS COUNTRY AUTO,CT,5AUKV8,2000,MERCURY,GRAND MARQ,BLUE,8-20-2018,166


#### Filtering on multiple conditions
The & symbol represents a logical AND

In [18]:
towed_cars[(towed_cars['Vehicle_State'] == 'CT') \
           & (towed_cars['Vehicle_Year'] > 2005) ]

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
1,353279,RENO'S AUTO BODY,CT,AK15141,2007,FORD ESCAP,XLT,BLUE,8-20-2018,170
2,353280,WHITEY'S FRAME SHOP,CT,408HFD,2013,CHEVY,RELAY 3,BLUE,8-20-2018,175
5,353283,CENTRAL GARAGE,CT,410ZZN,2006,TOYOTA,SIENNA CE/,BLUE,8-20-2018,177


#### Logical OR
Represented by |

In [4]:
towed_cars[(towed_cars['Vehicle_State'] == 'AL') \
           | (towed_cars['Color'] == 'GRAY')]

NameError: name 'towed_cars' is not defined

#### The ~ character represents the negation of a condition

In [20]:
towed_cars[~(towed_cars['Color'] == 'BLUE')]

Unnamed: 0,TowNum,Tow_Firm,Vehicle_State,Vehicle_Plate,Vehicle_Year,Make,Model,Color,Date,Time
7,353285,CAPITOL AUTOMOTIVE LLC,AL,702WUF,2014,ACURA RLX,TECH AUDIO,GREEN,8-20-2018,120
8,353286,CORONA'S AUTO PARTS INC,AL,427WVV,2008,HONDA-X,CIVIC,GREEN,8-20-2018,135
9,353277,MODERN GARAGE,AL,XM91398,1990,NISSAN-II,MAXIMA,GREEN,8-20-2018,130
10,353288,MODERN AUTOMOTIVE LLC,AL,702WUF,2018,ACURA RLX New,TECH AUDIO,GREEN,8-20-2018,140
11,353290,MODERN GARAGE,QT,AM91398,1999,NISSAN-III,MAXIMA,GRAY,8-20-2018,190
12,353291,FRIENDLY AUTO BODY & TOWING,QT,907HFD,2001,SUBARU,LEGACY,GRAY,8-20-2018,188
13,353292,CROSS COUNTRY AUTO_X,QT,976WVV,2012,HONDA-II,ACCORD,GRAY,8-20-2018,210
14,353293,CENTRAL AUTO & TRANSPORT,QT,AG79812,2013,SUBARU-X,LEGACY,GRAY,8-20-2018,215
15,353294,CROSS COUNTRY AUTO,QT,AM31852,1995,ACURA-Fo,3.2 TL,GRAY,8-20-2018,220
