## Video 3.1 - Getting Started with Pandas

In [1]:
import pandas as pd
from pandas import Series, DataFrame

### Series

A `pandas.Series` is a one-dimensional array-like object

In [2]:
data = Series([4, -1, 3, 2])

data

0    4
1   -1
2    3
3    2
dtype: int64

In [3]:
data[0]

4

In [4]:
data.values

array([ 4, -1,  3,  2], dtype=int64)

In [5]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
data = Series([4, -1, 3, 2], index=['a', 'b', 'c', 'd'])

data

a    4
b   -1
c    3
d    2
dtype: int64

In [7]:
data[0]

4

In [8]:
data['a']

4

In [9]:
data[['a', 'b']]

a    4
b   -1
dtype: int64

In [10]:
data[data > 0]

a    4
c    3
d    2
dtype: int64

In [11]:
data * 2

a    8
b   -2
c    6
d    4
dtype: int64

In [12]:
import numpy as np

np.exp(data)

a    54.598150
b     0.367879
c    20.085537
d     7.389056
dtype: float64

In [13]:
city_data = {'London': 8.6, 'Paris': 2.2, 'Berlin': 3.6}

data = Series(city_data, index=['Berlin', 'London', 'Madrid', 'Paris', 'Rome'])

data

Berlin    3.6
London    8.6
Madrid    NaN
Paris     2.2
Rome      NaN
dtype: float64

### Data Frames

A `pandas.DataFrame` is a table-like structure

In [14]:
purchases = [{'Customer': 'Bob', 'Item': 'Oranges', 'Quantity': 2, 'Unit price': 2},
             {'Customer': 'Bob', 'Item': 'Apples', 'Quantity': 3, 'Unit price': 1},
             {'Customer': 'Bob', 'Item': 'Milk', 'Quantity': 1, 'Unit price': 4},
             {'Customer': 'Alice', 'Item': 'Oranges', 'Quantity': 2, 'Unit price': 2},
             {'Customer': 'Alice', 'Quantity': 2, 'Unit price': 3}]
df = DataFrame(purchases)

df

Unnamed: 0,Customer,Item,Quantity,Unit price
0,Bob,Oranges,2,2
1,Bob,Apples,3,1
2,Bob,Milk,1,4
3,Alice,Oranges,2,2
4,Alice,,2,3


#### Accessing rows and columns

In [15]:
df.loc[0]

Customer          Bob
Item          Oranges
Quantity            2
Unit price          2
Name: 0, dtype: object

In [16]:
df['Item']

0    Oranges
1     Apples
2       Milk
3    Oranges
4        NaN
Name: Item, dtype: object

In [17]:
df.loc[0, 'Item']

'Oranges'

In [18]:
df.loc[0:2, ['Item', 'Quantity']]

Unnamed: 0,Item,Quantity
0,Oranges,2
1,Apples,3
2,Milk,1


#### Boolean indexing

In [19]:
is_alice = df['Customer'] == 'Alice'

is_alice

0    False
1    False
2    False
3     True
4     True
Name: Customer, dtype: bool

In [20]:
df[is_alice]

Unnamed: 0,Customer,Item,Quantity,Unit price
3,Alice,Oranges,2,2
4,Alice,,2,3


#### Modifying the data frame

In [21]:
df['Total cost'] = df['Unit price'] * df['Quantity']

df

Unnamed: 0,Customer,Item,Quantity,Unit price,Total cost
0,Bob,Oranges,2,2,4
1,Bob,Apples,3,1,3
2,Bob,Milk,1,4,4
3,Alice,Oranges,2,2,4
4,Alice,,2,3,6


In [22]:
del df['Total cost']

df

Unnamed: 0,Customer,Item,Quantity,Unit price
0,Bob,Oranges,2,2
1,Bob,Apples,3,1
2,Bob,Milk,1,4
3,Alice,Oranges,2,2
4,Alice,,2,3


In [23]:
df.drop(4)

Unnamed: 0,Customer,Item,Quantity,Unit price
0,Bob,Oranges,2,2
1,Bob,Apples,3,1
2,Bob,Milk,1,4
3,Alice,Oranges,2,2


In [24]:
df

Unnamed: 0,Customer,Item,Quantity,Unit price
0,Bob,Oranges,2,2
1,Bob,Apples,3,1
2,Bob,Milk,1,4
3,Alice,Oranges,2,2
4,Alice,,2,3


In [25]:
new_df = df.drop(3)
new_df

Unnamed: 0,Customer,Item,Quantity,Unit price
0,Bob,Oranges,2,2
1,Bob,Apples,3,1
2,Bob,Milk,1,4
4,Alice,,2,3


In [26]:
df.drop(3, inplace=True)
df

Unnamed: 0,Customer,Item,Quantity,Unit price
0,Bob,Oranges,2,2
1,Bob,Apples,3,1
2,Bob,Milk,1,4
4,Alice,,2,3


In [None]:
df.loc[4, 'Item'] = 'Bananas'

df