In [4]:
import pandas as pd
df = pd.read_csv('datasets/alta_noaa_1980_2019.csv')

1. Convert a column with date information to a date.

In [5]:
dates = pd.to_datetime(df.DATE)
print(dates)

0       1980-01-01
1       1980-01-02
2       1980-01-03
3       1980-01-04
4       1980-01-05
           ...    
14155   2019-09-03
14156   2019-09-04
14157   2019-09-05
14158   2019-09-06
14159   2019-09-07
Name: DATE, Length: 14160, dtype: datetime64[ns]


2. Put the date information into the index for a numeric column.

In [3]:
snow = df.SNOW.rename(dates)
print(snow)

1980-01-01    2.0
1980-01-02    3.0
1980-01-03    1.0
1980-01-04    0.0
1980-01-05    0.0
             ... 
2019-09-03    0.0
2019-09-04    0.0
2019-09-05    0.0
2019-09-06    0.0
2019-09-07    0.0
Name: SNOW, Length: 14160, dtype: float64


3. Calculate the average value of the column for each month.

In [18]:
avg = snow.loc['1980-01':'2019-09']
avg.resample('M').mean()

1980-01-31    4.645161
1980-02-29    3.879310
1980-03-31    3.967742
1980-04-30    1.000000
1980-05-31    1.516129
                ...   
2019-05-31    0.838710
2019-06-30    0.000000
2019-07-31    0.000000
2019-08-31    0.000000
2019-09-30    0.000000
Freq: M, Name: SNOW, Length: 477, dtype: float64

4. Calculate the average value of the column for every 2 months.

In [39]:
avg = snow.loc['2018-01':'2019-09']

def two_months(index):
    year = index.year
    month = index.month
    if month + 1 == 13:
        return month, 1, year + 1
    else:
        return month, month + 1, year
    

avg.groupby(two_months).mean()


(1, 2, 2018)      1.806452
(1, 2, 2019)      2.854839
(2, 3, 2018)      2.492857
(2, 3, 2019)      3.953571
(3, 4, 2018)      2.345161
(3, 4, 2019)      3.012903
(4, 5, 2018)      1.453333
(4, 5, 2019)      1.863333
(5, 6, 2018)      0.200000
(5, 6, 2019)      0.838710
(6, 7, 2018)      0.000000
(6, 7, 2019)      0.000000
(7, 8, 2018)      0.000000
(7, 8, 2019)      0.000000
(8, 9, 2018)      0.000000
(8, 9, 2019)      0.000000
(9, 10, 2018)     0.000000
(9, 10, 2019)     0.000000
(10, 11, 2018)    0.861290
(11, 12, 2018)    1.393333
(12, 1, 2019)     2.050000
Name: SNOW, dtype: float64

5. Calculate the percentage of the column out of the total for each month.


In [41]:
per = snow.loc[ '1980-01':'2019-09']
per.resample('2M').sum().div(season2017.sum()).mul(100)

1980-01-31    0.747485
1980-03-31    1.222449
1980-05-31    0.399697
1980-07-31    0.005191
1980-09-30    0.000000
                ...   
2019-01-31    0.778630
2019-03-31    1.059456
2019-05-31    0.425132
2019-07-31    0.000000
2019-09-30    0.000000
Freq: 2M, Name: SNOW, Length: 239, dtype: float64

6. Calculate the average value of the column for a rolling window of size 7.

In [14]:
snow.rolling(7).mean()

1980-01-01    NaN
1980-01-02    NaN
1980-01-03    NaN
1980-01-04    NaN
1980-01-05    NaN
             ... 
2019-09-03    0.0
2019-09-04    0.0
2019-09-05    0.0
2019-09-06    0.0
2019-09-07    0.0
Name: SNOW, Length: 14160, dtype: float64

7. Using .loc pull out the first 3 months of a year.


In [71]:
years = list(range(1980, 2020))
str_years = [str(year) for year in years]
for i in range(len(str_years)):
    print(snow.loc[f'{str_years[i]}-01-01':f'{str_years[i]}-04-01'])


1980-01-01     2.0
1980-01-02     3.0
1980-01-03     1.0
1980-01-04     0.0
1980-01-05     0.0
              ... 
1980-03-28    12.0
1980-03-29     0.0
1980-03-30     0.0
1980-03-31    13.0
1980-04-01     0.0
Name: SNOW, Length: 92, dtype: float64
1981-01-01     0.0
1981-01-02     0.0
1981-01-03     0.0
1981-01-04     1.0
1981-01-05     8.0
              ... 
1981-03-28     6.0
1981-03-29     2.0
1981-03-30    12.0
1981-03-31    10.0
1981-04-01     0.0
Name: SNOW, Length: 91, dtype: float64
1982-01-01     6.0
1982-01-02     7.0
1982-01-03    16.0
1982-01-04     0.0
1982-01-05     4.0
              ... 
1982-03-28     0.0
1982-03-29     1.0
1982-03-30    19.0
1982-03-31     6.0
1982-04-01     0.0
Name: SNOW, Length: 91, dtype: float64
1983-01-01     0.0
1983-01-02     0.0
1983-01-03     0.0
1983-01-04     0.0
1983-01-05     2.0
              ... 
1983-03-28     3.0
1983-03-29     0.0
1983-03-30     2.0
1983-03-31     0.0
1983-04-01    12.0
Name: SNOW, Length: 91, dtype: float64
1984-01-

8. Using .loc pull out the last 4 months of a year

In [72]:
years = list(range(1980, 2020))
str_years = [str(year) for year in years]
for i in range(len(str_years) - 1):
    print(snow.loc[f'{str_years[i]}-09-01':f'{str_years[i + 1]}-01-01'])

1980-09-01    0.0
1980-09-02    0.0
1980-09-03    0.0
1980-09-04    0.0
1980-09-05    0.0
             ... 
1980-12-28    0.0
1980-12-29    0.0
1980-12-30    0.0
1980-12-31    0.0
1981-01-01    0.0
Name: SNOW, Length: 123, dtype: float64
1981-09-01     0.0
1981-09-02     0.0
1981-09-03     0.0
1981-09-04     0.0
1981-09-05     0.0
              ... 
1981-12-28     8.0
1981-12-29     0.0
1981-12-30     3.0
1981-12-31    15.0
1982-01-01     6.0
Name: SNOW, Length: 123, dtype: float64
1982-09-01    0.0
1982-09-02    0.0
1982-09-03    0.0
1982-09-04    0.0
1982-09-05    0.0
             ... 
1982-12-28    7.0
1982-12-29    0.0
1982-12-30    0.0
1982-12-31    0.0
1983-01-01    0.0
Name: SNOW, Length: 123, dtype: float64
1983-09-01     0.0
1983-09-02     0.0
1983-09-03     0.0
1983-09-04     0.0
1983-09-05     0.0
              ... 
1983-12-28    13.0
1983-12-29     0.0
1983-12-30     3.0
1983-12-31     7.0
1984-01-01     1.0
Name: SNOW, Length: 123, dtype: float64
1984-09-01    0.0
1984-09-