In [1]:
import numpy as np # Numpy Library

# Pandas

    - Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool built on top of
      NumPy
    - Python's version of MS Excel or R Data Frames, that allows for fast analysis, data cleaning and preparation
    - It has built-in visualization features
    - It can work with various data sources

In [5]:
import pandas as pd # Pandas Library

## Series

    - A Pandas Series is a one-dimensional array of indexed data. It can be created from a list or array.
    - Very similar to a NumPy array but a Series has access labels

In [71]:
np_array = np.array([0.25,0.5,0.75,1])

In [72]:
np_array

array([0.25, 0.5 , 0.75, 1.  ])

In [73]:
data = pd.Series(np_array)

In [74]:
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [13]:
# As you can see, the Series wraps both a sequence of values and a sequence of indices, which we can access with
# the values and index attributes. The values are simply similar to NumPy array

In [75]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [76]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [18]:
# The index is an array-like object of type pd.Index, which we will discuss in details

## Series as a generalized NumPy array

In [19]:
# Like with a NumPy array, data can be accessed by the associated index via the familiar Python square-bracket notation

In [20]:
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [21]:
data[1]

0.5

In [22]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [23]:
# As we will see, the Pandas Series is much more general and flexible than the one-dimensional NumPy array that it emulates

In [24]:
# The essential difference with a Series and a NumPy array is that the presence of the index

# While NumPy aray has an implicitly defined integer index used to access the values, a Series has an explicitly defined
# index associated with the values, Let me demonstrate an example.

In [25]:
data = pd.Series([0.25,.5,0.75,1.0], index=['a','b','c','d'])

In [26]:
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [27]:
# Access to the item can be done with the following

In [28]:
data['b']

0.5

In [29]:
data[['a','b']]

a    0.25
b    0.50
dtype: float64

In [31]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [32]:
# We can even use noncontiguous (non-continous) or nonsequential indices, such as the following

In [34]:
data = pd.Series([0.25,0.5,0.75,1.0], [2,4,6,8])

In [35]:
data

2    0.25
4    0.50
6    0.75
8    1.00
dtype: float64

## Series as a specialized dictionary

In [36]:
# You can also think of a Series as a specialized dictionary

# As you can recall, a dictionary is a structure that maps arbitrary keys to a set of arbitrary values (key value pairings)

# A Series maps typed keys to as set of typed values

# We can make a Series from a dictionary as we will demonstrate below

In [38]:
population_dict = {'California': 38332521, 'Texas': 26448193, 'New York': 19651127, 'Florida': 19552860}

In [39]:
population = pd.Series(population_dict)

In [40]:
population

California    38332521
Texas         26448193
New York      19651127
Florida       19552860
dtype: int64

In [41]:
population['California']

38332521

In [78]:
population[['Texas','California']]

Texas         26448193
California    38332521
dtype: int64

In [81]:
population[1:3]

Texas       26448193
New York    19651127
dtype: int64

## Construction of Series objects

In [55]:
# You already seen a few ways of constructing a Series from scratch, all of them are some version of the following

In [44]:
# pd.Series(data, index)

# Where index is an optional argument, and data can be one of many entities

# index defaults to an integer sequence

In [45]:
pd.Series([2,4,6])

0    2
1    4
2    6
dtype: int64

In [46]:
# Also, data can be a scaler, which is repeated to fill the specified indices

In [47]:
pd.Series(10,['a','b','c'])

a    10
b    10
c    10
dtype: int64

In [48]:
# The index can be explicitly set if a different result is preferred

In [49]:
pd.Series({2:'a',1:'b',3:'c'}, index=[2,1])

2    a
1    b
dtype: object

In [50]:
# Notice that in this case, the Series is populated with the explicitly identified keys

In [58]:
# You can also combine Series such as

In [63]:
series_1 = pd.Series([1,2,3,4],['Philippines','USA','Canada','Italy'])

In [64]:
series_2 = pd.Series([1,2,3,4],['Philippines','Germany','USA','Canada'])

In [65]:
series_1 + series_2

Canada         7.0
Germany        NaN
Italy          NaN
Philippines    2.0
USA            5.0
dtype: float64

In [68]:
# It will try match the operation based of the index

# Where it cannot find a match it will be a NaN (Not a Number)

# When you are performing operations integers will converted into floats in order to retain information as much as possible

In [70]:
# For the next topic we will talk of Data Frames, we will be working a lot more on Data Frames
# which is the true Work Horse of Pandas