## Pandas Series objects are like dicts
- Fixed length
- Ordered dict
- Mapping index values to data values


In [2]:
import pandas as pd

In [15]:
city_data = {'Dar es Salaam': 3500, "Nairobi": 100,'Toronto': 2930, 'Accra': 3449}
obj = pd.Series(city_data)
obj

Dar es Salaam    3500
Nairobi           100
Toronto          2930
Accra            3449
dtype: int64

### Sort indexes

In [16]:
# Tune how to sort index
index_sort = ["Accra", "Nairobi", "Dar es Salaam", "Toronto"] #simple alphabetical sort, but could be any arrangement
obj2 = pd.Series(city_data, index=index_sort)
obj2

Accra            3449
Nairobi           100
Dar es Salaam    3500
Toronto          2930
dtype: int64

## Detect Missing Data. Check for missing values in Pandas Series Object
Input: Pandas Series object

Output boolean values indicating whether the index (or row) contains NaN


In [17]:
pd.isnull(obj2)
# type(pd.isnull(obj2)) Series object

Accra            False
Nairobi          False
Dar es Salaam    False
Toronto          False
dtype: bool

In [18]:
pd.notnull(obj2)

Accra            True
Nairobi          True
Dar es Salaam    True
Toronto          True
dtype: bool

In [19]:
obj2

Accra            3449
Nairobi           100
Dar es Salaam    3500
Toronto          2930
dtype: int64

## Data Alignment in Pandas Series (Similar to Join operation)
Motivation: Automatically align by index label in **arithmetic operations**

In [20]:
obj3 = {'Dar es Salaam': 300, 'Toronto': 2000, 'Accra': 600, 'Montreal': 6700}
obj3 = pd.Series(obj3)
obj3

Dar es Salaam     300
Toronto          2000
Accra             600
Montreal         6700
dtype: int64

In [22]:
obj2 + obj3 # we lost data about Nairobi here

Accra            4049.0
Dar es Salaam    3800.0
Montreal            NaN
Nairobi             NaN
Toronto          4930.0
dtype: float64

## Note.. Dont lose data as a result of Null data during arithmetic ops. + is like the Inner Join. 
![joins.png](attachment:ee849019-34bd-475e-8158-90c46abbf158.png)
## NaN + 3244 = NaN
![nulllogic.png](attachment:74e98a5a-6a69-4e02-96b6-6b4fa7bd9eca.png)


# Name your pandas objects (datasets)

In [27]:
# Name your objects (tags, filename)
obj3

Dar es Salaam     300
Toronto          2000
Accra             600
Montreal         6700
dtype: int64

In [31]:
obj3.name = "CitiesV3"
obj3

Dar es Salaam     300
Toronto          2000
Accra             600
Montreal         6700
Name: CitiesV3, dtype: int64

## DataFrame
Rectangular table of data and contains an ordered collection of columns each of which can be a differe nt value type (numeric, string, boolean)

The Dataframe has both a

- row index and 
- column index


In [34]:
data = {'first_name': ['Tom', 'Robo', 'Iron', 'Bat'],
       'last_name': ['Cat','X','Man','Man'],
       'year': [2000, 2002, 2009, 4000]}
frame = pd.DataFrame(data)
frame

Unnamed: 0,first_name,last_name,year
0,Tom,Cat,2000
1,Robo,X,2002
2,Iron,Man,2009
3,Bat,Man,4000


### Specify sequence and load null field

In [35]:
frame = pd.DataFrame(data, columns=["last_name", "year", "first_name", "null_field"])
frame

Unnamed: 0,last_name,year,first_name,null_field
0,Cat,2000,Tom,
1,X,2002,Robo,
2,Man,2009,Iron,
3,Man,4000,Bat,


In [36]:
frame.columns

Index(['last_name', 'year', 'first_name', 'null_field'], dtype='object')

In [37]:
frame.year

0    2000
1    2002
2    2009
3    4000
Name: year, dtype: int64

In [38]:
type(frame.year)

pandas.core.series.Series

In [41]:
frame.loc[3] # Series object

last_name      Man
year          4000
first_name     Bat
null_field     NaN
Name: 3, dtype: object

In [42]:
frame.null_field = 8 #Assign scalar value to all rows of a field
frame

Unnamed: 0,last_name,year,first_name,null_field
0,Cat,2000,Tom,8
1,X,2002,Robo,8
2,Man,2009,Iron,8
3,Man,4000,Bat,8
