# Intro to Pandas

## Series

In [1]:
# basic imports
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

### Create our first series

A Series is an array of data associated with data labels, its index 

In [2]:
#Lets create a Series 

obj = Series(data= [2,4,6,8])

#Show
obj

0    2
1    4
2    6
3    8
dtype: int64

In [3]:
# check the type
type(obj)

pandas.core.series.Series

In [4]:
#Lets show only the values

obj.values

array([2, 4, 6, 8])

In [5]:
#Lets show the index

obj.index

RangeIndex(start=0, stop=4, step=1)

### Now lets create a Series with an index

In [6]:
#Python developers 
py_devs = Series(data = [870, 430, 300, 210, 400], index=['Spain','Germany','China','Japan','USA'])

#Show
py_devs

Spain      870
Germany    430
China      300
Japan      210
USA        400
dtype: int64

In [7]:
# show the index
py_devs.index

Index(['Spain', 'Germany', 'China', 'Japan', 'USA'], dtype='object')

In [8]:
#Now we can use index values to select Series values
py_devs['USA']

400

#### Can also check with array operations

In [9]:
#Check who has more than 400 professional python developers

py_devs[py_devs > 400]

Spain      870
Germany    430
dtype: int64

In [10]:
# This operation will gives you a boolean Series
py_devs > 400

Spain       True
Germany     True
China      False
Japan      False
USA        False
dtype: bool

#### Can treat Series as ordered dictionary

In [12]:
#Check if Spain is in Series

'Spain' in py_devs

True

#### Can convert Series into Python dictionary

In [13]:
# We use the "to_dict()" method
devs_dict = py_devs.to_dict()

#Show
devs_dict

{'China': 300, 'Germany': 430, 'Japan': 210, 'Spain': 870, 'USA': 400}

#### Can convert back into a Series

In [14]:
# Back to series use the "Series" method
devs_series = Series(data= devs_dict)

# Show
devs_series

China      300
Germany    430
Japan      210
Spain      870
USA        400
dtype: int64

**Note:** If we pass a dictionary the index will have the dict keys in order

In [15]:
# Here we set a new index

countries = ['China','Germany','Japan','USA','Spain','Argentina']

In [16]:
#Lets redefine a Series

obj2 = Series(data= devs_dict, index= countries)

In [18]:
#Show
obj2

China        300.0
Germany      430.0
Japan        210.0
USA          400.0
Spain        870.0
Argentina      NaN
dtype: float64

### We can use isnull and notnull to find missing data ona Series

We have two options

In [19]:
# First option
# We can pass the Series as parameter of the pandas method

pd.isnull(obj2)

China        False
Germany      False
Japan        False
USA          False
Spain        False
Argentina     True
dtype: bool

In [20]:
# Second option 
# We can call the method inside the series

obj2.isnull() 

China        False
Germany      False
Japan        False
USA          False
Spain        False
Argentina     True
dtype: bool

#### Same for the opposite, notnull

We have two options

In [21]:
# First option

pd.notnull(obj2)

China         True
Germany       True
Japan         True
USA           True
Spain         True
Argentina    False
dtype: bool

In [22]:
# Second option

obj2.notnull()

China         True
Germany       True
Japan         True
USA           True
Spain         True
Argentina    False
dtype: bool

## Operations on Series

In [63]:
#Lets see the py_devs Series again

py_devs

USSR       870
Germany    430
China      300
Japan      210
USA        400
dtype: int64

In [64]:
#Lets check our Series with Argentine again

obj2

China        300.0
Germany      430.0
Japan        210.0
USA          400.0
USSR         870.0
Argentina      NaN
dtype: float64

Adding two Series

In [79]:
#Now we can add the two series and pandas automatically aligns data by index

py_devs + obj2 

Argentina       NaN
China         600.0
Germany       860.0
Japan         420.0
USA           800.0
USSR         1740.0
dtype: float64

Define a name

In [23]:
#We can give Series names
obj2.name = "Python Developers by Countries"

In [24]:
#Show
obj2

China        300.0
Germany      430.0
Japan        210.0
USA          400.0
Spain        870.0
Argentina      NaN
Name: Python Developers by Countries, dtype: float64

Define an Index Name

In [25]:
#We can also name index
obj2.index.name = 'COUNTRIES'

In [26]:
#Show
obj2

COUNTRIES
China        300.0
Germany      430.0
Japan        210.0
USA          400.0
Spain        870.0
Argentina      NaN
Name: Python Developers by Countries, dtype: float64

### We will see the Series many times during this session, but now...

## Let's do some exercise!