In [1]:
# Import libraries
import pandas as pd
import numpy as np

### Load Excel File

In [2]:
filename = 'data/car_financing.xlsx'
df = pd.read_excel(filename)

## Slicing
1. How to select columns in pandas 
2. How to use slicing operations in pandas

In [3]:
df.head()

Unnamed: 0,Month,Starting Balance,Repayment,Interest Paid,Principal Paid,New Balance,term,interest_rate,car_type
0,1,34689.96,687.23,202.93,484.3,34205.66,60,0.0702,Toyota Sienna
1,2,34205.66,687.23,200.1,487.13,33718.53,60,0.0702,Toyota Sienna
2,3,33718.53,687.23,197.25,489.98,33228.55,60,0.0702,Toyota Sienna
3,4,33228.55,687.23,194.38,492.85,32735.7,60,0.0702,Toyota Sienna
4,5,32735.7,687.23,191.5,495.73,32239.97,60,0.0702,Toyota Sienna


### Select columns using brackets
With square brackets, you can select one or more columns.

In [4]:
# Select one column using double brackets
df[['car_type']].head()

Unnamed: 0,car_type
0,Toyota Sienna
1,Toyota Sienna
2,Toyota Sienna
3,Toyota Sienna
4,Toyota Sienna


In [5]:
# Select multiple columns using double brackets
df[['car_type', 'Principal Paid']].head()

Unnamed: 0,car_type,Principal Paid
0,Toyota Sienna,484.3
1,Toyota Sienna,487.13
2,Toyota Sienna,489.98
3,Toyota Sienna,492.85
4,Toyota Sienna,495.73


In [6]:
# This is a Pandas DataFrame
type(df[['car_type']].head())

pandas.core.frame.DataFrame

In [7]:
# Select one column using single brackets
# This produces a pandas series which is a one-dimensional array which can be labeled
df['car_type'].head()

0    Toyota Sienna
1    Toyota Sienna
2    Toyota Sienna
3    Toyota Sienna
4    Toyota Sienna
Name: car_type, dtype: object

In [8]:
# This is a pandas series
type(df['car_type'].head())

pandas.core.series.Series

In [9]:
# Keep in mind that you can't select multiple colums using single brackets
# This will result in a KeyError
df['car_type', 'Principal Paid']

KeyError: ('car_type', 'Principal Paid')

In [10]:
df[['car_type', 'Principal Paid']]

Unnamed: 0,car_type,Principal Paid
0,Toyota Sienna,484.30
1,Toyota Sienna,487.13
2,Toyota Sienna,489.98
3,Toyota Sienna,492.85
4,Toyota Sienna,495.73
5,Toyota Sienna,498.63
6,Toyota Sienna,501.55
7,Toyota Sienna,504.48
8,Toyota Sienna,507.43
9,Toyota Sienna,510.40


### Pandas Slicing

With a pandas series, we can select rows using slicing like this: series[start_index:end_index]

The end_index is not inclusive. This behavior is very similar to Python lists.

In [11]:
df['car_type']

0      Toyota Sienna
1      Toyota Sienna
2      Toyota Sienna
3      Toyota Sienna
4      Toyota Sienna
5      Toyota Sienna
6      Toyota Sienna
7      Toyota Sienna
8      Toyota Sienna
9      Toyota Sienna
10     Toyota Sienna
11     Toyota Sienna
12     Toyota Sienna
13     Toyota Sienna
14     Toyota Sienna
15     Toyota Sienna
16     Toyota Sienna
17     Toyota Sienna
18     Toyota Sienna
19     Toyota Sienna
20     Toyota Sienna
21     Toyota Sienna
22     Toyota Sienna
23     Toyota Sienna
24     Toyota Sienna
25     Toyota Sienna
26     Toyota Sienna
27     Toyota Sienna
28     Toyota Sienna
29     Toyota Sienna
           ...      
378        VW Golf R
379        VW Golf R
380        VW Golf R
381        VW Golf R
382        VW Golf R
383        VW Golf R
384        VW Golf R
385        VW Golf R
386        VW Golf R
387        VW Golf R
388        VW Golf R
389        VW Golf R
390        VW Golf R
391        VW Golf R
392        VW Golf R
393        VW Golf R
394        VW

In [12]:
df['car_type'][0:10]

0    Toyota Sienna
1    Toyota Sienna
2    Toyota Sienna
3    Toyota Sienna
4    Toyota Sienna
5    Toyota Sienna
6    Toyota Sienna
7    Toyota Sienna
8    Toyota Sienna
9    Toyota Sienna
Name: car_type, dtype: object

In [13]:
# Select column using dot notation. 
# This is not recommended.
df.car_type.head()

0    Toyota Sienna
1    Toyota Sienna
2    Toyota Sienna
3    Toyota Sienna
4    Toyota Sienna
Name: car_type, dtype: object

In [14]:
"""
This won't work as there is a space in the column name. 
Dot notation also fails if your column has the same name 
of a DataFrame's attributes or methods.
"""
df.Principal Paid

SyntaxError: invalid syntax (<ipython-input-14-fc6a34f07095>, line 6)

In [15]:
df['Principal Paid']

0      484.30
1      487.13
2      489.98
3      492.85
4      495.73
5      498.63
6      501.55
7      504.48
8      507.43
9      510.40
10     513.39
11     516.39
12     519.41
13     522.45
14     525.51
15     528.58
16     531.67
17     534.78
18     537.91
19     541.06
20     544.22
21     547.41
22     550.61
23     553.83
24     557.07
25     560.33
26     563.61
27     566.90
28     570.22
29     573.56
        ...  
378    740.41
379    742.20
380    743.99
381    745.79
382    747.59
383    749.40
384    751.21
385    753.03
386    754.85
387    756.67
388    758.50
389    760.33
390    762.17
391    764.01
392    765.86
393    767.71
394    769.57
395    771.43
396    773.29
397    775.16
398    777.03
399    778.91
400    780.79
401    782.68
402    784.57
403    786.47
404    788.37
405    790.27
406    792.18
407    794.10
Name: Principal Paid, Length: 408, dtype: float64

### Selecting Columns using loc
The pandas attribute .loc allow you to select columns, index, and slice your data. 

In [16]:
# pandas dataframe
df.loc[:, ['car_type']].head()

Unnamed: 0,car_type
0,Toyota Sienna
1,Toyota Sienna
2,Toyota Sienna
3,Toyota Sienna
4,Toyota Sienna


In [None]:
# pandas series
df.loc[:, 'car_type'].head()