In [2]:
import pandas as pd
# convert Date col dtype to numpy (I think) datetime
parse_date = lambda x: pd.datetime.strptime(x, '%Y-%m-%d')
df = pd.read_csv('adbe.csv', parse_dates=['Date'], date_parser=parse_date)
# test a date parser
df.loc[0, 'Date'].day_name()
# get number of rows and cols
df.shape
df.head(2)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1986-08-14,0.0,0.230469,0.222656,0.222656,0.161679,4160000
1,1986-08-15,0.0,0.222656,0.21875,0.21875,0.158843,4332800


In [3]:
df['Day'] = df['Date'].dt.day_name()
df.head(2)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Day
0,1986-08-14,0.0,0.230469,0.222656,0.222656,0.161679,4160000,Thursday
1,1986-08-15,0.0,0.222656,0.21875,0.21875,0.158843,4332800,Friday


## Boolean indexing to get the first and last days in the series

In [4]:
print(df.loc[df['Date'] == df['Date'].min()])
print(df.loc[df['Date'] == df['Date'].max()])

        Date  Open      High       Low     Close  Adj Close   Volume       Day
0 1986-08-14   0.0  0.230469  0.222656  0.222656   0.161679  4160000  Thursday
           Date   Open        High         Low       Close   Adj Close  \
8614 2020-10-16  504.0  510.339996  500.700012  502.820007  502.820007   

       Volume     Day  
8614  2441200  Friday  


In [5]:
(df['Date'].max() - df['Date'].min()).days

12482

In [6]:
df.loc[df['Date'] >= pd.to_datetime('2020-01-01')]

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Day
8414,2020-01-02,330.000000,334.480011,329.170013,334.429993,334.429993,1990100,Thursday
8415,2020-01-03,329.170013,332.980011,328.690002,331.809998,331.809998,1577600,Friday
8416,2020-01-06,328.290009,333.910004,328.190002,333.709991,333.709991,1874700,Monday
8417,2020-01-07,334.149994,334.790009,332.309998,333.390015,333.390015,2500800,Tuesday
8418,2020-01-08,333.809998,339.230011,333.399994,337.869995,337.869995,2248500,Wednesday
...,...,...,...,...,...,...,...,...
8610,2020-10-12,508.260010,516.000000,499.540009,510.890015,510.890015,3149700,Monday
8611,2020-10-13,512.190002,519.599976,511.899994,514.309998,514.309998,2438100,Tuesday
8612,2020-10-14,514.340027,516.950012,501.600006,506.309998,506.309998,2032900,Wednesday
8613,2020-10-15,499.260010,502.500000,491.850006,501.149994,501.149994,2042800,Thursday


## Good idea to set the Date col to index for timeseries

In [7]:
df.set_index('Date', inplace=True)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1986-08-14,0.000000,0.230469,0.222656,0.222656,0.161679,4160000,Thursday
1986-08-15,0.000000,0.222656,0.218750,0.218750,0.158843,4332800,Friday
1986-08-18,0.000000,0.218750,0.210938,0.210938,0.153170,2828800,Monday
1986-08-19,0.000000,0.218750,0.214844,0.214844,0.156006,2060800,Tuesday
1986-08-20,0.000000,0.226563,0.214844,0.214844,0.156006,684800,Wednesday
...,...,...,...,...,...,...,...
2020-10-12,508.260010,516.000000,499.540009,510.890015,510.890015,3149700,Monday
2020-10-13,512.190002,519.599976,511.899994,514.309998,514.309998,2438100,Tuesday
2020-10-14,514.340027,516.950012,501.600006,506.309998,506.309998,2032900,Wednesday
2020-10-15,499.260010,502.500000,491.850006,501.149994,501.149994,2042800,Thursday


## Now use simple slicing

In [8]:
df['2018':'2019']
df['2020-01':'2020-02']
# let's get stats on closing price for Jan - Feb
ser_close = df['2020-01':'2020-02']['Close']
print(ser_close.mean())
print(ser_close.min())
print(ser_close.max())
print(ser_close.std())  # standard deviation

354.72999954999995
331.809998
383.27999900000003
14.607439967571867


## Resampling
let's use resampling to get the max closing per month of year 2019

In [9]:
df['2019']['Close'].resample('M').max()

Date
2019-01-31    247.820007
2019-02-28    262.899994
2019-03-31    267.690002
2019-04-30    289.250000
2019-05-31    285.579987
2019-06-30    302.109985
2019-07-31    311.269989
2019-08-31    298.440002
2019-09-30    287.750000
2019-10-31    279.390015
2019-11-30    309.529999
2019-12-31    331.200012
Freq: M, Name: Close, dtype: float64

## Use `agg` for stats on different cols

In [10]:
df['2020'].resample('M').agg({'Close':'mean','High':'max','Low':'min', 'Volume':'sum'})

Unnamed: 0_level_0,Close,High,Low,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-31,344.954763,356.820007,328.190002,44249000
2020-02-29,365.534209,386.75,324.0,56917400
2020-03-31,318.99,364.950012,255.130005,126565500
2020-04-30,329.446189,356.230011,289.709991,73062200
2020-05-31,368.556003,391.269989,340.0,55543400
2020-06-30,411.542275,446.149994,382.0,70625900
2020-07-31,442.405,470.609985,416.290009,54649300
2020-08-31,468.464287,533.700012,431.589996,54517800
2020-09-30,485.745239,536.880005,452.519989,84770900
2020-10-31,497.131668,519.599976,477.5,26646100
