# Chapter 5: Getting Started with pandas

In [1]:
import numpy as np
import pandas as pd
import datetime
from pandas import Series, DataFrame

### A Series can be thought of as a fixed-length, ordered dictionary, as it is a mapping of index values to data values.

In [2]:
sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

### Consequently, a Series can be converted back to a dictionary with its to_dict method.

In [3]:
obj3.to_dict()

{'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

### Naming a series and index.

In [4]:
obj3.name = "population"
obj3.index.name = "state"

In [5]:
obj3

state
Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
Name: population, dtype: int64

## Essential Functionality

### Reindexing

In [6]:
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=["d", "b", "a", "c"])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [7]:
obj2 = obj.reindex(["a", "b", "c", "d", "e"])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

### Reindexing and forward filling the data

In [8]:
obj3 = pd.Series(["blue", "purple", "yellow"], index = [0, 2, 4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [9]:
obj3.reindex(np.arange(6), method="ffill")

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

![image.png](attachment:0ce49d55-571d-43ca-9162-f195e718d637.png)

### Dropping Entries from an Axis

In [10]:
obj = pd.Series(np.arange(5.), index=["a", "b", "c", "d", "e"])
obj.drop("c")

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [11]:
# or
obj.drop(["a", "b"])

c    2.0
d    3.0
e    4.0
dtype: float64

In [12]:
data = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=["Ohio", "Colorado", "Utah", "New York"],
                    columns=["one", "two", "three", "four"])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [13]:
data.drop(index=["Ohio","Utah"])

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
New York,12,13,14,15


In [14]:
data.drop(columns=["one", "two"])

Unnamed: 0,three,four
Ohio,2,3
Colorado,6,7
Utah,10,11
New York,14,15


In [15]:
data.drop("two", axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


In [16]:
data.drop(["two", "four"], axis="columns")

Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
Utah,8,10
New York,12,14


In [17]:
# Slicing with conditions.
data.iloc[1:3,[3,0,1]]
data.iloc[:, :3][data.three > 5]

Unnamed: 0,one,two,three
Colorado,4,5,6
Utah,8,9,10
New York,12,13,14


In [18]:
data.loc[data.three >= 2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [19]:
data[data.three >= 2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [20]:
data[data["three"] >= 2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


### Treasure
![image.png](attachment:feb61643-8e1e-4a45-b691-9f1ca0536e27.png)

### Flexible arthmetic methods

In [21]:
s1 = pd.Series([7.2, -2.5, 3.4, 1.5], index=["a", "c", "d", "e"])
s2 = pd.Series([-2.1, 3.6, -1.5, 4., 3.2], index=["a", "c", "e", "f", "g"])
s1 + s2

a    5.1
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [22]:
s1.add(s2, fill_value=0)

a    5.1
c    1.1
d    3.4
e    0.0
f    4.0
g    3.2
dtype: float64

![image.png](attachment:bb2cbfcf-b369-4226-a752-b1d2c3faa2e4.png)

In [2]:
obj = pd.Series(np.arange(4), index=["d", "a", "b", "c"])

In [3]:
obj

d    0
a    1
b    2
c    3
dtype: int32

In [4]:
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int32

### Missing values are sorted to the end of the Series by default.

In [5]:
obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])
obj.sort_values()

4   -3.0
5    2.0
0    4.0
2    7.0
1    NaN
3    NaN
dtype: float64

In [6]:
obj.sort_values(na_position="first")

1    NaN
3    NaN
4   -3.0
5    2.0
0    4.0
2    7.0
dtype: float64

In [6]:
frame = pd.DataFrame({"b": [4, 7, -3, 2], "a": [0, 1, 0, 1]})
frame
# frame.sort_values("b")

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [11]:
frame.idxmax() # Returns the index of the max value
frame.idxmin() # Returns the index of the min value
frame.cumsum() # Cumulative sum

Unnamed: 0,b,a
0,4,0
1,11,1
2,8,1
3,10,2


![image.png](attachment:27237366-e860-4767-b50a-ea914d8eb6c8.png)