
# Time Series Analysis with Pandas


**[Python Data Science Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/)**: Python Data Science Handbook<br>
**[Python Data Science Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/03.11-working-with-time-series.html)**: Working with Time Series<br>
**[Consumer Price Index](https://beta.bls.gov/dataViewer/view)**: All items in U.S. city average, all urban consumers, not seasonally adjusted




In [1]:
from datetime import datetime
from dateutil import parser
import numpy as np
import pandas as pd
from pandas.tseries.offsets import BDay
from pandas_datareader import data
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn; seaborn.set()

In [None]:
pip install pand

In [6]:
df = pd.read_csv('/Users/jasonrobinson/Downloads/cpi_latest.csv')
df.head()

Unnamed: 0,Series ID,Year,Period,Label,Value
0,CUUR0000SA0,2012,M01,2012 Jan,226.665
1,CUUR0000SA0,2012,M02,2012 Feb,227.663
2,CUUR0000SA0,2012,M03,2012 Mar,229.392
3,CUUR0000SA0,2012,M04,2012 Apr,230.085
4,CUUR0000SA0,2012,M05,2012 May,229.815


In [7]:
df = pd.read_csv('/Users/jasonrobinson/Downloads/cpi_latest.csv', parse_dates=['Label'])
df.head()

Unnamed: 0,Series ID,Year,Period,Label,Value
0,CUUR0000SA0,2012,M01,2012-01-01,226.665
1,CUUR0000SA0,2012,M02,2012-02-01,227.663
2,CUUR0000SA0,2012,M03,2012-03-01,229.392
3,CUUR0000SA0,2012,M04,2012-04-01,230.085
4,CUUR0000SA0,2012,M05,2012-05-01,229.815


In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123 entries, 0 to 122
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Series ID  123 non-null    object        
 1   Year       123 non-null    int64         
 2   Period     123 non-null    object        
 3   Label      123 non-null    datetime64[ns]
 4   Value      123 non-null    float64       
dtypes: datetime64[ns](1), float64(1), int64(1), object(2)
memory usage: 4.9+ KB


In [16]:
label_value = df[['Label', 'Value']]
index = pd.DatetimeIndex(label_value['Label'])

In [47]:
data = pd.Series(np.arange(0,123), index=index)
data

Label
2012-01-01      0
2012-02-01      1
2012-03-01      2
2012-04-01      3
2012-05-01      4
             ... 
2021-11-01    118
2021-12-01    119
2022-01-01    120
2022-02-01    121
2022-03-01    122
Length: 123, dtype: int64

In [48]:
data['2015-08-12':'2019-08-12']

Label
2015-09-01    44
2015-10-01    45
2015-11-01    46
2015-12-01    47
2016-01-01    48
2016-02-01    49
2016-03-01    50
2016-04-01    51
2016-05-01    52
2016-06-01    53
2016-07-01    54
2016-08-01    55
2016-09-01    56
2016-10-01    57
2016-11-01    58
2016-12-01    59
2017-01-01    60
2017-02-01    61
2017-03-01    62
2017-04-01    63
2017-05-01    64
2017-06-01    65
2017-07-01    66
2017-08-01    67
2017-09-01    68
2017-10-01    69
2017-11-01    70
2017-12-01    71
2018-01-01    72
2018-02-01    73
2018-03-01    74
2018-04-01    75
2018-05-01    76
2018-06-01    77
2018-07-01    78
2018-08-01    79
2018-09-01    80
2018-10-01    81
2018-11-01    82
2018-12-01    83
2019-01-01    84
2019-02-01    85
2019-03-01    86
2019-04-01    87
2019-05-01    88
2019-06-01    89
2019-07-01    90
2019-08-01    91
dtype: int64

In [49]:
data['2022']

Label
2022-01-01    120
2022-02-01    121
2022-03-01    122
dtype: int64

In [52]:
# Get our daily dates
data.to_period('D')

Label
2012-01-01      0
2012-02-01      1
2012-03-01      2
2012-04-01      3
2012-05-01      4
             ... 
2021-11-01    118
2021-12-01    119
2022-01-01    120
2022-02-01    121
2022-03-01    122
Freq: D, Length: 123, dtype: int64

In [53]:
# Get our monthly dates
data.to_period('M')

Label
2012-01      0
2012-02      1
2012-03      2
2012-04      3
2012-05      4
          ... 
2021-11    118
2021-12    119
2022-01    120
2022-02    121
2022-03    122
Freq: M, Length: 123, dtype: int64

In [55]:
data - data[0]

Label
2012-01-01      0
2012-02-01      1
2012-03-01      2
2012-04-01      3
2012-05-01      4
             ... 
2021-11-01    118
2021-12-01    119
2022-01-01    120
2022-02-01    121
2022-03-01    122
Length: 123, dtype: int64

In [60]:
label.date_range('2015-08-12','2019-08-12')

DatetimeIndex(['2015-08-12', '2015-08-13', '2015-08-14', '2015-08-15',
               '2015-08-16', '2015-08-17', '2015-08-18', '2015-08-19',
               '2015-08-20', '2015-08-21',
               ...
               '2019-08-03', '2019-08-04', '2019-08-05', '2019-08-06',
               '2019-08-07', '2019-08-08', '2019-08-09', '2019-08-10',
               '2019-08-11', '2019-08-12'],
              dtype='datetime64[ns]', length=1462, freq='D')

In [61]:
pd.timedelta_range(0, periods=9, freq="2H30T")

TimedeltaIndex(['00:00:00', '02:30:00', '05:00:00', '07:30:00', '10:00:00',
                '12:30:00', '15:00:00', '17:30:00', '20:00:00'],
               dtype='timedelta64[ns]', freq='150T')

In [84]:
from pandas_datareader import data
import pandas_datareader as DataReader
import pandas_datareader.fred as FredReader
pdr.get_data_fred('GS10')[:5]


Unnamed: 0_level_0,GS10
DATE,Unnamed: 1_level_1
2017-05-01,2.3
2017-06-01,2.19
2017-07-01,2.32
2017-08-01,2.21
2017-09-01,2.2


In [78]:
 FredReader('AAPL', start='2019-08-12', end='2019-08-15',
            retry_count=3, 
            pause=0.1, 
            timeout=30, 
            session=None, 
            freq='H')

TypeError: 'module' object is not callable

In [80]:
def dataset_loader(stock_name):
    
    dataset = data_reader.pdr(stock_name , data_source = 'yahoo')
    
    start_date = str(dataset.index[0]).split()[0]
    end_date = str(dataset.index[-1]).split()[0]
    
    close = dataset['Close']
    return close  