# Chapter 3 Manipulating Tabular Data Using Pandas

## Pandas Series

In [1]:
import pandas as pd
series = pd.Series([1, 2, 3, 4, 5])
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


### Creating a Series Using a Specified Index

In [2]:
series = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'c'])
print(series)

a    1
b    2
c    3
d    4
c    5
dtype: int64


### Accessing Elements in a Series

In [3]:
print(series[2])

3


In [4]:
print(series.iloc[2])

3


In [5]:
print(series['d'])

4


In [6]:
print(series.loc['d'])

4


In [7]:
print(series['c'])

c    3
c    5
dtype: int64


In [8]:
print(series[2:])

c    3
d    4
c    5
dtype: int64


In [9]:
print(series.iloc[2:])

c    3
d    4
c    5
dtype: int64


### Specifying a Datetime Range as the Index of a Series

In [10]:
dates1 = pd.date_range('20190525', periods=12)
print(dates1)

DatetimeIndex(['2019-05-25', '2019-05-26', '2019-05-27', '2019-05-28',
               '2019-05-29', '2019-05-30', '2019-05-31', '2019-06-01',
               '2019-06-02', '2019-06-03', '2019-06-04', '2019-06-05'],
              dtype='datetime64[ns]', freq='D')


In [11]:
series = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
series.index = dates1
print(series)

2019-05-25     1
2019-05-26     2
2019-05-27     3
2019-05-28     4
2019-05-29     5
2019-05-30     6
2019-05-31     7
2019-06-01     8
2019-06-02     9
2019-06-03    10
2019-06-04    11
2019-06-05    12
Freq: D, dtype: int64


### Date Ranges

In [12]:
dates2 = pd.date_range('2019-05-01', periods=12, freq='M')
print(dates2)

DatetimeIndex(['2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31',
               '2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30'],
              dtype='datetime64[ns]', freq='M')


In [13]:
dates2 = pd.date_range('2019-05-01', periods=12, freq='MS')
print(dates2)

DatetimeIndex(['2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
               '2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
               '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'],
              dtype='datetime64[ns]', freq='MS')


In [14]:
dates2 = pd.date_range('05-01-2019', periods=12, freq='MS')
print(dates2)

DatetimeIndex(['2019-05-01', '2019-06-01', '2019-07-01', '2019-08-01',
               '2019-09-01', '2019-10-01', '2019-11-01', '2019-12-01',
               '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'],
              dtype='datetime64[ns]', freq='MS')


In [15]:
dates3 = pd.date_range('2019/05/17 09:00:00', periods=8, freq='H')
print(dates3)

DatetimeIndex(['2019-05-17 09:00:00', '2019-05-17 10:00:00',
               '2019-05-17 11:00:00', '2019-05-17 12:00:00',
               '2019-05-17 13:00:00', '2019-05-17 14:00:00',
               '2019-05-17 15:00:00', '2019-05-17 16:00:00'],
              dtype='datetime64[ns]', freq='H')


## Pandas DataFrame

### Creating a DataFrame

In [16]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD'))
print(df)

          A         B         C         D
0  0.305105  0.240376  0.350113  0.567974
1  0.102303  0.513897  0.694170  0.118541
2  0.263734 -0.293242  1.701675  2.054463
3 -1.566876  0.518524  0.903695  0.075384
4 -0.577484  1.224248  1.113758 -0.071571
5 -0.085298 -1.507773  0.342582 -0.087198
6 -0.035371 -0.072490 -2.025304  1.003003
7  0.891709 -0.377260  1.961159 -0.233158
8  0.480387 -1.471018 -0.681615 -1.180124
9 -3.281776 -0.796900  0.690201  0.505465


In [17]:
df = pd.read_csv('data.csv')