# Chapter 3 Manipulating Tabular Data Using Pandas

## Pandas Series

In [1]:
import pandas as pd
series = pd.Series([1, 2, 3, 4, 5])
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


### Creating a Series Using a Specified Index

In [2]:
series = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'c'])
print(series)

a    1
b    2
c    3
d    4
c    5
dtype: int64


### Accessing Elements in a Series

In [3]:
print(series[2])

3


In [4]:
print(series.iloc[2])

3


In [5]:
print(series['d'])

4


In [6]:
print(series.loc['d'])

4


In [7]:
print(series['c'])

c    3
c    5
dtype: int64


In [8]:
print(series[2:])

c    3
d    4
c    5
dtype: int64


In [9]:
print(series.iloc[2:])

c    3
d    4
c    5
dtype: int64


### Specifying a Datetime Range as the Index of a Series

In [10]:
dates1 = pd.date_range('20190525', periods=12)
print(dates1)

DatetimeIndex(['2019-05-25', '2019-05-26', '2019-05-27', '2019-05-28',
               '2019-05-29', '2019-05-30', '2019-05-31', '2019-06-01',
               '2019-06-02', '2019-06-03', '2019-06-04', '2019-06-05'],
              dtype='datetime64[ns]', freq='D')


In [11]:
series = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
series.index = dates1
print(series)

2019-05-25     1
2019-05-26     2
2019-05-27     3
2019-05-28     4
2019-05-29     5
2019-05-30     6
2019-05-31     7
2019-06-01     8
2019-06-02     9
2019-06-03    10
2019-06-04    11
2019-06-05    12
Freq: D, dtype: int64


### Date Ranges

In [12]:
dates2 = pd.date_range('2019-05-01', periods=12, freq='M')
print(dates2)

DatetimeIndex(['2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31',
               '2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30'],
              dtype='datetime64[ns]', freq='M')


In [13]:
dates2 = pd.date_range('2019-05-01', periods=12, freq='MS')
print(dates2)

DatetimeIndex(['2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
               '2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
               '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'],
              dtype='datetime64[ns]', freq='MS')


In [14]:
dates2 = pd.date_range('05-01-2019', periods=12, freq='MS')
print(dates2)

DatetimeIndex(['2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
               '2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
               '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'],
              dtype='datetime64[ns]', freq='MS')


In [15]:
dates3 = pd.date_range('2019/05/17 09:00:00', periods=8, freq='H')
print(dates3)

DatetimeIndex(['2019-05-17 09:00:00', '2019-05-17 10:00:00',
               '2019-05-17 11:00:00', '2019-05-17 12:00:00',
               '2019-05-17 13:00:00', '2019-05-17 14:00:00',
               '2019-05-17 15:00:00', '2019-05-17 16:00:00'],
              dtype='datetime64[ns]', freq='H')


## Pandas DataFrame

### Creating a DataFrame

In [16]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD'))
print(df)

          A         B         C         D
0  0.023654 -0.269924  0.694438  0.040826
1  0.537053  0.719049 -0.023338  0.509946
2  0.023140 -0.442828 -0.922281 -1.567735
3  0.046315 -0.686066 -2.108718  0.577322
4  0.448606  0.971420 -0.275715 -1.695220
5  0.619324 -0.175496 -0.236109  0.728733
6  0.537426 -0.403340  0.655414 -0.193278
7  0.836323 -0.771222 -0.561935 -0.491586
8 -2.093254  0.532104  0.067748 -0.419500
9 -0.485611  0.298147 -0.466288  0.156754


In [17]:
df = pd.read_csv('data.csv')

### Specifying the Index in a DataFrame

In [18]:
df = pd.read_csv('data.csv')
days = pd.date_range('20190525', periods=10)
df.index = days
print(df)

                   A         B         C         D
2019-05-25  0.187497  1.122150 -0.988277 -1.985934
2019-05-26  0.360803 -0.562243 -0.340693 -0.986988
2019-05-27 -0.040627  0.067333 -0.452978  0.686223
2019-05-28 -0.279572 -0.702492  0.252265  0.958977
2019-05-29  0.537438 -1.737568  0.714727 -0.939288
2019-05-30  0.070011 -0.516443 -1.655689  0.246721
2019-05-31  0.001268  0.951517  2.107360 -0.108726
2019-06-01 -0.185258  0.856520 -0.686285  1.104195
2019-06-02  0.387023  1.706336 -2.452653  0.260466
2019-06-03 -1.054974  0.556775 -0.945219 -0.030295


In [19]:
print(df.index)

DatetimeIndex(['2019-05-25', '2019-05-26', '2019-05-27', '2019-05-28',
               '2019-05-29', '2019-05-30', '2019-05-31', '2019-06-01',
               '2019-06-02', '2019-06-03'],
              dtype='datetime64[ns]', freq='D')


In [20]:
print(df.values)

[[ 1.874970e-01  1.122150e+00 -9.882770e-01 -1.985934e+00]
 [ 3.608030e-01 -5.622430e-01 -3.406930e-01 -9.869880e-01]
 [-4.062700e-02  6.733300e-02 -4.529780e-01  6.862230e-01]
 [-2.795720e-01 -7.024920e-01  2.522650e-01  9.589770e-01]
 [ 5.374380e-01 -1.737568e+00  7.147270e-01 -9.392880e-01]
 [ 7.001100e-02 -5.164430e-01 -1.655689e+00  2.467210e-01]
 [ 1.268000e-03  9.515170e-01  2.107360e+00 -1.087260e-01]
 [-1.852580e-01  8.565200e-01 -6.862850e-01  1.104195e+00]
 [ 3.870230e-01  1.706336e+00 -2.452653e+00  2.604660e-01]
 [-1.054974e+00  5.567750e-01 -9.452190e-01 -3.029500e-02]]
