In [3]:
import pandas as pd

# Reading CSV with ```parse_data``` parameter set to True

In [4]:
series = pd.read_csv("daily-minimum-temperatures.csv", header=0, index_col=0, parse_dates=True)

In [5]:
df = pd.DataFrame()

# Extracting Dates

In [6]:
df['month'] = [series.index[i].month for i in range(len(series))]
df

Unnamed: 0,month
0,1
1,1
2,1
3,1
4,1
...,...
3645,12
3646,12
3647,12
3648,12


In [7]:
df['day'] = [series.index[i].day for i in range(len(series))]
df

Unnamed: 0,month,day
0,1,1
1,1,2
2,1,3
3,1,4
4,1,5
...,...,...
3645,12,27
3646,12,28
3647,12,29
3648,12,30


In [17]:
df['temperature'] = [series.values[i][0] for i in range(len(series))]
df

Unnamed: 0,month,day,temperature
0,1,1,20.7
1,1,2,17.9
2,1,3,18.8
3,1,4,14.6
4,1,5,15.8
...,...,...,...
3645,12,27,14.0
3646,12,28,13.6
3647,12,29,13.5
3648,12,30,15.7


# Extracting Statistics Features

In [19]:
temps = pd.DataFrame(series.values)

In [21]:
window = temps.expanding()
dataframe = pd.concat([window.min(), window.mean(), window.max(), temps.shift(-1)], axis=1)
dataframe.columns = ['min', 'mean', 'max', 't+1']

In [22]:
dataframe

Unnamed: 0,min,mean,max,t+1
0,20.7,20.700000,20.7,17.9
1,17.9,19.300000,20.7,18.8
2,17.9,19.133333,20.7,14.6
3,14.6,18.000000,20.7,15.8
4,14.6,17.560000,20.7,15.8
...,...,...,...,...
3645,0.0,11.174712,26.3,13.6
3646,0.0,11.175377,26.3,13.5
3647,0.0,11.176014,26.3,15.7
3648,0.0,11.177254,26.3,13.0


# Creating Lag Features

In [24]:
temps = pd.DataFrame(series.values)
dataframe = pd.concat([temps.shift(3), temps.shift(2), temps.shift(1), temps], axis=1)
dataframe.columns = ['t-2', 't-1', 't', 't+1']

In [25]:
dataframe

Unnamed: 0,t-2,t-1,t,t+1
0,,,,20.7
1,,,20.7,17.9
2,,20.7,17.9,18.8
3,20.7,17.9,18.8,14.6
4,17.9,18.8,14.6,15.8
...,...,...,...,...
3645,10.0,12.9,14.6,14.0
3646,12.9,14.6,14.0,13.6
3647,14.6,14.0,13.6,13.5
3648,14.0,13.6,13.5,15.7


# Extracting Rolling Mean Feature

In [27]:
temps = pd.DataFrame(series.values)
shifted = temps.shift(1)
window = shifted.rolling(window=2)
means = window.mean()
dataframe = pd.concat([means, temps], axis=1)
dataframe.columns = ['mean(t-1,t)', 't+1']

In [28]:
dataframe

Unnamed: 0,"mean(t-1,t)",t+1
0,,20.7
1,,17.9
2,19.30,18.8
3,18.35,14.6
4,16.70,15.8
...,...,...
3645,13.75,14.0
3646,14.30,13.6
3647,13.80,13.5
3648,13.55,15.7


# Extracting Rolling Statistics Features

In [29]:
temps = pd.DataFrame(series.values)
width = 3
shifted = temps.shift(width - 1)
window = shifted.rolling(window=width)
dataframe = pd.concat([window.min(), window.mean(), window.max(), temps], axis=1)
dataframe.columns = ['min', 'mean', 'max', 't+1']

In [30]:
dataframe

Unnamed: 0,min,mean,max,t+1
0,,,,20.7
1,,,,17.9
2,,,,18.8
3,,,,14.6
4,17.9,19.133333,20.7,15.8
...,...,...,...,...
3645,10.0,12.266667,13.9,14.0
3646,10.0,12.500000,14.6,13.6
3647,12.9,13.833333,14.6,13.5
3648,13.6,14.066667,14.6,15.7
