# Pandas - Cleaning Data

In [16]:
import pandas as pd

## Working with Missing Data

#### Read CSV data - parse_dates to parse dates in the first column of the CSV file

In [17]:
city_data = pd.read_csv('./Data/city_temperatures.csv', sep=',', parse_dates=[0])

#### MIssing Data will be replaced with NaN

In [18]:
city_data

Unnamed: 0,Date,Temperature_City_1,Temperature_City_2,City_Choice
0,2016-01-15,15.0,18.0,2
1,2016-02-15,,19.0,2
2,2016-03-15,20.0,17.0,1
3,2016-04-15,19.0,,2
4,2016-05-15,22.0,25.0,1
5,2016-06-15,,27.0,1


#### Replace Nan with mean values of each column
mean(axis = 0) takes mean in the column as the NaN

In [19]:
city_data.fillna(city_data.mean(axis=0))

Unnamed: 0,Date,Temperature_City_1,Temperature_City_2,City_Choice
0,2016-01-15,15.0,18.0,2
1,2016-02-15,19.0,19.0,2
2,2016-03-15,20.0,17.0,1
3,2016-04-15,19.0,21.2,2
4,2016-05-15,22.0,25.0,1
5,2016-06-15,19.0,27.0,1


## Working with Erroneous Data

In [27]:
try:
    data = pd.read_csv('./Data/error_data.csv')
except Exception as e:
    print("Can't proccess data: ", e)

Can't proccess data:  Error tokenizing data. C error: Expected 2 fields in line 3, saw 3



#### Use error_bad_lines to ingore lines with erroneous data

In [28]:
try:
    data = pd.read_csv('./Data/error_data.csv', error_bad_lines=False)
except Exception as e:
    print("Can't proccess data: ", e)

b'Skipping line 3: expected 2 fields, saw 3\n'


In [29]:
data

Unnamed: 0,col1,col2
0,1,3
1,2,7
2,3,9
