In [2]:
import pandas as pd

In [4]:
airport = pd.Series([
    "Seattle",
    "Dulles",
    "London Heathrow",
    "Schiphol",
    "Changi",
    "Pearson",
    "Narita"
])

airport

0            Seattle
1             Dulles
2    London Heathrow
3           Schiphol
4             Changi
5            Pearson
6             Narita
dtype: object

In [6]:
airport[2]

'London Heathrow'

In [7]:
for value in airport:
    print(value)

Seattle
Dulles
London Heathrow
Schiphol
Changi
Pearson
Narita


## DataFrame

In [11]:
airports = pd.DataFrame([
    ["Seatle-Tacoma", "Seatle", "USA"],
    ["Dulles", "Washington", "USA"],
    ["London-Heathrow", "London", "UK"],
    ["Schiphol", "Amsterdam", "Netherlands"],
    ["Changi", "Singapore", "Singapore"],
    ["Pearson", "Toronto", "Canada"],
    ["Narita", "Tokyo", "Japan"]
    ],
    columns = ["Name", "City", "Country"] 
    )

airports

Unnamed: 0,Name,City,Country
0,Seatle-Tacoma,Seatle,USA
1,Dulles,Washington,USA
2,London-Heathrow,London,UK
3,Schiphol,Amsterdam,Netherlands
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


### Return first n of rows
- head()

In [12]:
airports.head(3)

Unnamed: 0,Name,City,Country
0,Seatle-Tacoma,Seatle,USA
1,Dulles,Washington,USA
2,London-Heathrow,London,UK


### Return last n of rows
- tail()

In [13]:
airports.tail(3)

Unnamed: 0,Name,City,Country
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


### Checking the n rows and columns
- shape

In [14]:
airports.shape

(7, 3)

Getting more detail info
- info()
- n of rows and range of index values
- n of columns 
- For each column: column name, n non-null values, datatype

In [15]:
airports.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Name     7 non-null      object
 1   City     7 non-null      object
 2   Country  7 non-null      object
dtypes: object(3)
memory usage: 296.0+ bytes


## Return a column

In [16]:
airports['City']

0        Seatle
1    Washington
2        London
3     Amsterdam
4     Singapore
5       Toronto
6         Tokyo
Name: City, dtype: object

### Return specific columns

In [17]:
airports[['City', 'Name']]

Unnamed: 0,City,Name
0,Seatle,Seatle-Tacoma
1,Washington,Dulles
2,London,London-Heathrow
3,Amsterdam,Schiphol
4,Singapore,Changi
5,Toronto,Pearson
6,Tokyo,Narita


### Use iloc to return specific rows and columns

In [18]:
# return first row and column
airports.iloc[0,0]

'Seatle-Tacoma'

### Use : to return all rows and columns

In [23]:
airports.iloc[:, :]

Unnamed: 0,Name,City,Country
0,Seatle-Tacoma,Seatle,USA
1,Dulles,Washington,USA
2,London-Heathrow,London,UK
3,Schiphol,Amsterdam,Netherlands
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


### Return a range of columns and rows

In [28]:
# Return all rows and first and second columns
airports.iloc[:, 0:2]

Unnamed: 0,Name,City
0,Seatle-Tacoma,Seatle
1,Dulles,Washington
2,London-Heathrow,London
3,Schiphol,Amsterdam
4,Changi,Singapore
5,Pearson,Toronto
6,Narita,Tokyo


In [29]:
# Return first and second rows and all columns
airports.iloc[0:2, :]

Unnamed: 0,Name,City,Country
0,Seatle-Tacoma,Seatle,USA
1,Dulles,Washington,USA


### Return a list of rows and columns

In [30]:
# Return 1st and 3rd rows and all columns 
airports.iloc[[0,2], :]

Unnamed: 0,Name,City,Country
0,Seatle-Tacoma,Seatle,USA
2,London-Heathrow,London,UK


In [31]:
# Return all rows and 1st and 3rd columns
airports.iloc[:, [0,2]]

Unnamed: 0,Name,Country
0,Seatle-Tacoma,USA
1,Dulles,USA
2,London-Heathrow,UK
3,Schiphol,Netherlands
4,Changi,Singapore
5,Pearson,Canada
6,Narita,Japan


### Use loc to return columns with names

In [34]:
airports.loc[:, ['Name', 'Country']]

Unnamed: 0,Name,Country
0,Seatle-Tacoma,USA
1,Dulles,USA
2,London-Heathrow,UK
3,Schiphol,Netherlands
4,Changi,Singapore
5,Pearson,Canada
6,Narita,Japan


### Load csv into dataframe

In [36]:
airport_df = pd.read_csv('data/airports.csv')
airport_df

Unnamed: 0,Name,City,Country
0,Seatle-Tacoma,Seatle,USA
1,Dulles,Washington,USA
2,London-Heathrow,London,UK
3,Schiphol,Amsterdam,Netherlands
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


### Handling errors in our csv files
using error_bad_lines=False

In [40]:
airport_df = pd.read_csv('data/airports-errors.csv', error_bad_lines=False)
airport_df



  exec(code_obj, self.user_global_ns, self.user_ns)
b'Skipping line 4: expected 3 fields, saw 4\n'


Unnamed: 0,Name,City,Country
0,Seatle-Tacoma,Seatle,USA
1,Dulles,Washington,USA
2,Schiphol,Amsterdam,Netherlands
3,Changi,Singapore,Singapore
4,Pearson,Toronto,Canada
5,Narita,Tokyo,Japan


### Handling files with no column headers
using header=None

In [41]:
airport_df = pd.read_csv('data/airports-errors-headers.csv', 
    error_bad_lines=False,
    header=None)
airport_df



  exec(code_obj, self.user_global_ns, self.user_ns)
b'Skipping line 3: expected 3 fields, saw 4\n'


Unnamed: 0,0,1,2
0,Seatle-Tacoma,Seatle,USA
1,Dulles,Washington,USA
2,Schiphol,Amsterdam,Netherlands
3,Changi,Singapore,Singapore
4,Pearson,Toronto,Canada
5,Narita,Tokyo,Japan


### using names parameter to specify columns with no headers 

In [47]:
airport_df = pd.read_csv('data/airports-errors-headers.csv', 
    header=None,
    names=['Name', 'City', 'Country'])
airport_df

Unnamed: 0,Name,City,Country
0,Seatle-Tacoma,Seatle,USA
1,Dulles,Washington,USA
2,London-Heathrow,London,UK
3,Schiphol,,Netherlands
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


### Use to_csv to write a dataframe to a csv file
specify index=False to ignore index values

In [48]:
airport_df.to_csv('data/new.csv', index=False)