## PANDAS Series Creation and Indexing

In [30]:
# Create a Pandas Series 
import pandas as pd

step_data = [3620, 7891, 9761, 3907, 4338, 5373]
step_counts = pd.Series(step_data, name = 'steps')

print (step_counts)


0    3620
1    7891
2    9761
3    3907
4    4338
5    5373
Name: steps, dtype: int64


In [31]:
# Add a date range to Series 
step_counts.index = pd.date_range('20150329', periods = 6)
print (step_counts)

2015-03-29    3620
2015-03-30    7891
2015-03-31    9761
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: steps, dtype: int64


In [32]:
# Select data by index values, just like dictionary 
print (step_counts['2015-04-01'])

3907


In [33]:
# or by index position-like an array 
print (step_counts[3])

3907


In [34]:
# Select all of April 
print (step_counts['2015-04'])

2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: steps, dtype: int64


## PANDAS Data Types and Imputation 

In [35]:
# View the data type 
print (step_counts.dtypes)

int64


In [20]:
import numpy as np 

# Convert to a float 
step_counts = step_counts.astype(np.float)

# View the data type 
print (step_counts.dtypes)

float64


#### Invalid data points can be easily filled with values 

In [42]:
# Create invalid data 
step_counts[1:3] = np.NaN
print (step_counts)
print ("\n")


# Now fill it in with zeros 
step_counts = step_counts.fillna(0.)
print (step_counts[1:3])

# same as above 
# step_counts.fillna(0., inplace = True)
# print (step_counts)


2015-03-29    3620.0
2015-03-30       NaN
2015-03-31       NaN
2015-04-01    3907.0
2015-04-02    4338.0
2015-04-03    5373.0
Freq: D, Name: steps, dtype: float64


2015-03-30    0.0
2015-03-31    0.0
Freq: D, Name: steps, dtype: float64


## PANDAS DataFrame Creation and Methods  

#### DataFrames can be created from lists, dictionaries, and Pandas Series 

In [45]:
# Create cycling distance data 
cycling_data = [10.7, 0, None, 2.4, 15.3, 10.9, 0, None]

# Create a tuple of data 
joined_data = list(zip(step_data, cycling_data))

# Crate dataframe
activity_df = pd.DataFrame(joined_data)

print (activity_df)

      0     1
0  3620  10.7
1  7891   0.0
2  9761   NaN
3  3907   2.4
4  4338  15.3
5  5373  10.9


#### Label column and add an index 

In [46]:
# Add column names to dataframe 
activity_df = pd.DataFrame(joined_data, 
                           index = pd.date_range('20150329', periods = 6), 
                           columns = ['Walking','Cycling'])

print (activity_df)


            Walking  Cycling
2015-03-29     3620     10.7
2015-03-30     7891      0.0
2015-03-31     9761      NaN
2015-04-01     3907      2.4
2015-04-02     4338     15.3
2015-04-03     5373     10.9


### Indexing dataframe ROWS using 'loc' and iloc' methods 

In [None]:
# Select row of data by index name 'loc'
print (activity_df.loc['2015-04-01'])


In [50]:
# Select row of data by integer position 'iloc'
print (activity_df.iloc[-3])


Walking    3907.0
Cycling       2.4
Name: 2015-04-01 00:00:00, dtype: float64


### Indexing DataFrame COLUMNS 

In [51]:
# Name of column 
print (activity_df['Walking'])

2015-03-29    3620
2015-03-30    7891
2015-03-31    9761
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: Walking, dtype: int64


#### DataFrame columns can also be indexed as properties 

In [52]:
# Object-oriented approach
print (activity_df.Walking)

2015-03-29    3620
2015-03-30    7891
2015-03-31    9761
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: Walking, dtype: int64


In [53]:
# 1st column 
print (activity_df.iloc[:,0])

2015-03-29    3620
2015-03-30    7891
2015-03-31    9761
2015-04-01    3907
2015-04-02    4338
2015-04-03    5373
Freq: D, Name: Walking, dtype: int64


## Reading Data with PANDAS
#### Read CSV and common filetypes with a single command