# Exploring Time Series Dataset:

In [1]:
import pandas as pd

In [17]:
series1 = pd.read_csv('daily-min-temperatures.csv',header = 0,index_col = 0,parse_dates = True , squeeze = True)

In [18]:
series1.head()

Date
1981-01-01    20.7
1981-01-02    17.9
1981-01-03    18.8
1981-01-04    14.6
1981-01-05    15.8
Name: Temp, dtype: float64

In [19]:
series1.tail()

Date
1990-12-27    14.0
1990-12-28    13.6
1990-12-29    13.5
1990-12-30    15.7
1990-12-31    13.0
Name: Temp, dtype: float64

In [20]:
series1.describe()

count    3650.000000
mean       11.177753
std         4.071837
min         0.000000
25%         8.300000
50%        11.000000
75%        14.000000
max        26.300000
Name: Temp, dtype: float64

# Basic Feature Engineering:

In This time-series dataset i am genrating a new feature like $month$ and $day$.

In [12]:
dataframe = pd.DataFrame()

In [23]:
dataframe['month'] = [series1.index[i].month for i in range(len(series1))]

In [24]:
dataframe['day'] = [series1.index[i].day for i in range(len(series1))]

In [25]:
dataframe['temperature'] = [series1[i] for i in range(len(series1))]

In [26]:
dataframe.head()

Unnamed: 0,month,day,temperature
0,1,1,20.7
1,1,2,17.9
2,1,3,18.8
3,1,4,14.6
4,1,5,15.8


# Lag Features:

It mean's that our $month$ and $day$ features are not much usefull for predicting temperature on future.

In [32]:
series2 = pd.read_csv('daily-min-temperatures.csv',header = 0,index_col = 0,parse_dates = True,squeeze = True)

In [46]:
temps = pd.DataFrame(series2.values)

In [47]:
dataframe2 = pd.concat([temps.shift(3),temps.shift(2),temps.shift(1),temps],axis = 1)

In [49]:
dataframe2.columns = ['t-2','t-1','t','t+1']

In [51]:
dataframe2.head()

Unnamed: 0,t-2,t-1,t,t+1
0,,,,20.7
1,,,20.7,17.9
2,,20.7,17.9,18.8
3,20.7,17.9,18.8,14.6
4,17.9,18.8,14.6,15.8


# Rolling Window Statistics

In [185]:
series3 = pd.read_csv('daily-min-temperatures.csv',header = 0,index_col = 0,parse_dates = True,squeeze = True)

In [186]:
temps = pd.DataFrame(series3.values)

In [187]:
shifted = temps.shift(3)

In [188]:
windows = shifted.rolling(window = 3)

In [189]:
mean = windows.mean()

In [190]:
dataframe3 = pd.concat([mean,temps],axis = 1)

In [191]:
dataframe3.columns = ['mean(t-2,t-1,t)','t+1']

In [193]:
dataframe3.head(6)

Unnamed: 0,"mean(t-2,t-1,t)",t+1
0,,20.7
1,,17.9
2,,18.8
3,,14.6
4,,15.8
5,19.133333,15.8


We can also metion a new features as $minimum$ $max$ and including $mean$ values in new dataframe.

In [194]:
series4 = pd.read_csv('daily-min-temperatures.csv',header = 0,index_col = 0,parse_dates = True,squeeze = True)

In [195]:
temps = pd.DataFrame(series4.values)

In [199]:
width = 3

In [200]:
shifted = temps.shift(width - 1)

In [201]:
windows = shifted.rolling(window = width)

In [202]:
dataframe4 = pd.concat([windows.min(),windows.mean(),windows.max(),temps],axis = 1)

In [203]:
dataframe4.columns = ['min','mean','max','t']

In [204]:
dataframe4.head(10)

Unnamed: 0,min,mean,max,t
0,,,,20.7
1,,,,17.9
2,,,,18.8
3,,,,14.6
4,17.9,19.133333,20.7,15.8
5,14.6,17.1,18.8,15.8
6,14.6,16.4,18.8,15.8
7,14.6,15.4,15.8,17.4
8,15.8,15.8,15.8,21.8
9,15.8,16.333333,17.4,20.0


# Expanding Window Statistics:

In [205]:
series5 = pd.read_csv('daily-min-temperatures.csv',header = 0,index_col = 0,parse_dates = True,squeeze = True)

In [206]:
temps = pd.DataFrame(series5.values)

In [207]:
windows = temps.expanding()

In [208]:
dataframe5 = pd.concat([windows.min(),windows.mean(),windows.max(),temps.shift(-1)],axis = 1)

In [209]:
dataframe5.columns = ['min','mean','max','t+1']

In [210]:
dataframe5.head(10)

Unnamed: 0,min,mean,max,t+1
0,20.7,20.7,20.7,17.9
1,17.9,19.3,20.7,18.8
2,17.9,19.133333,20.7,14.6
3,14.6,18.0,20.7,15.8
4,14.6,17.56,20.7,15.8
5,14.6,17.266667,20.7,15.8
6,14.6,17.057143,20.7,17.4
7,14.6,17.1,20.7,21.8
8,14.6,17.622222,21.8,20.0
9,14.6,17.86,21.8,16.2
