In [1]:
import pandas as pd

In [2]:
airports_df = pd.read_csv("Data/airports.csv")
airports_df

Unnamed: 0,Name,City,Country
0,Seattle-Tacoma,Seattle,USA
1,Dulles,Washington,USA
2,Heathrow,London,United Kingdom
3,Schiphol,Amsterdam,Netherlands
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


-----------

## Handing Rows with Errors
- pd.read_csv(file_name, **error_bad_lines = False**)

In [3]:
airports_df = pd.read_csv("Data/airportsInvalidRows.csv")

ParserError: Error tokenizing data. C error: Expected 3 fields in line 4, saw 4


### handling bad lines

In [6]:
airports_df = pd.read_csv(
                                        "Data/airportsInvalidRows.csv", 
                                        error_bad_lines = False
                                        )

b'Skipping line 4: expected 3 fields, saw 4\n'


Now, the row which includes bad values got skipped. There is no "London" row

In [7]:
airports_df

Unnamed: 0,Name,City,Country
0,Seattle-Tacoma,Seattle,USA
1,Dulles,Washington,USA
2,Schiphol,Amsterdam,Netherlands
3,Changi,Singapore,Singapore
4,Pearson,Toronto,Canada
5,Narita,Tokyo,Japan


-------

## Handling files with No Column Headers
- pd.read_csv(file_name, **header=None**)

In [8]:
airports_df = pd.read_csv("Data/airportsNoHeaderRows.csv")
airports_df

Unnamed: 0,Seattle-Tacoma,Seattle,USA
0,Dulles,Washington,USA
1,Heathrow,London,United Kingdom
2,Schiphol,Amsterdam,Netherlands
3,Changi,Singapore,Singapore
4,Pearson,Toronto,Canada
5,Narita,Tokyo,Japan


as we can see, pandas just assumed the first row as column header. Which is NOT CORRECT

In [9]:
airports_df = pd.read_csv(
                                        "Data/airportsNoHeaderRows.csv",
                                        header = None
                                        )
airports_df

Unnamed: 0,0,1,2
0,Seattle-Tacoma,Seattle,USA
1,Dulles,Washington,USA
2,Heathrow,London,United Kingdom
3,Schiphol,Amsterdam,Netherlands
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


## Naming Headers Name while reading csv
- pd.read_csv(file_name, header = None, **names = [columns_names_list]**)

In [12]:
airports_df = pd.read_csv(
                                        "Data/airportsNoHeaderRows.csv",
                                        header = None,
                                        names = ["Name", "City", "Country"]
                                        )
airports_df

Unnamed: 0,Name,City,Country
0,Seattle-Tacoma,Seattle,USA
1,Dulles,Washington,USA
2,Heathrow,London,United Kingdom
3,Schiphol,Amsterdam,Netherlands
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


--------------

## Missing Values (NaN) in data files

In [14]:
airports_df = pd.read_csv("Data/airportsBlankValues.csv")
airports_df

Unnamed: 0,Name,City,Country
0,Seattle-Tacoma,Seattle,USA
1,Dulles,Washington,USA
2,Heathrow,London,United Kingdom
3,Schiphol,,Netherlands
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


## Write DataFrame contents to csv file

In [15]:
airports_df

Unnamed: 0,Name,City,Country
0,Seattle-Tacoma,Seattle,USA
1,Dulles,Washington,USA
2,Heathrow,London,United Kingdom
3,Schiphol,,Netherlands
4,Changi,Singapore,Singapore
5,Pearson,Toronto,Canada
6,Narita,Tokyo,Japan


In [17]:
airports_df.to_csv("Data/Output/NewCSVFile.csv")

## write data frame to CSV , with NO INDEX
- df.to_csv(file_name, index = False)

In [18]:
airports_df.to_csv(
                                "Data/Output/NewCSVFile_noIndex.csv", 
                                index = False
                            )