In [5]:
from pandas import Series
from sklearn.preprocessing import MinMaxScaler
# define contrived series
data = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
series = Series(data)
print("series")
print(series)

# prepare data for normalization
values = series.values
values = values.reshape((len(values), 1))

# train the normalization 
scaler = MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit(values)
print('Min: %f, Max: %f' % (scaler.data_min_, scaler.data_max_))
# normalize the dataset and print
normalized = scaler.transform(values)

print("normalized")
print(normalized)

# inverse transform and print
# invert as in spread the normalised set of values back to the usual range
inversed = scaler.inverse_transform(normalized)
print("inversed")

print(inversed)

series
0     10
1     20
2     30
3     40
4     50
5     60
6     70
7     80
8     90
9    100
dtype: float64
Min: 10.000000, Max: 100.000000
normalized
[[ 0.        ]
 [ 0.11111111]
 [ 0.22222222]
 [ 0.33333333]
 [ 0.44444444]
 [ 0.55555556]
 [ 0.66666667]
 [ 0.77777778]
 [ 0.88888889]
 [ 1.        ]]
inversed
[[  10.]
 [  20.]
 [  30.]
 [  40.]
 [  50.]
 [  60.]
 [  70.]
 [  80.]
 [  90.]
 [ 100.]]


In [6]:
from pandas import Series
from sklearn.preprocessing import StandardScaler
from math import sqrt
# define contrived series
data = [1.0, 5.5, 9.0, 2.6, 8.8, 3.0, 4.1, 7.9, 6.3]
series = Series(data)
print(series)
# prepare data for normalization
values = series.values
values = values.reshape((len(values), 1))
# train the normalization
scaler = StandardScaler()
scaler = scaler.fit(values)
print('Mean: %f, StandardDeviation: %f' % (scaler.mean_, sqrt(scaler.var_)))
# normalize the dataset and print
standardized = scaler.transform(values)
print(standardized)
# inverse transform and print
inversed = scaler.inverse_transform(standardized)
print(inversed)

# Standardisation as opposed to normalisation above 
# brings the data with mean 0 and sd =1

0    1.0
1    5.5
2    9.0
3    2.6
4    8.8
5    3.0
6    4.1
7    7.9
8    6.3
dtype: float64
Mean: 5.355556, StandardDeviation: 2.712568
[[-1.60569456]
 [ 0.05325007]
 [ 1.34354035]
 [-1.01584758]
 [ 1.26980948]
 [-0.86838584]
 [-0.46286604]
 [ 0.93802055]
 [ 0.34817357]]
[[ 1. ]
 [ 5.5]
 [ 9. ]
 [ 2.6]
 [ 8.8]
 [ 3. ]
 [ 4.1]
 [ 7.9]
 [ 6.3]]


In [4]:
from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
# define example
data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']
values = array(data)
print("**values**")
print(values)


# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
print("**integer_encoded**")
print(integer_encoded)

# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
print("**onehot_encoded**")
print(onehot_encoded)

# invert first example
inverted = label_encoder.inverse_transform([argmax(onehot_encoded[0, :])])
print("**inverted**")
print(inverted)


**values**
['cold' 'cold' 'warm' 'cold' 'hot' 'hot' 'warm' 'cold' 'warm' 'hot']
**integer_encoded**
[0 0 2 0 1 1 2 0 2 1]
**onehot_encoded**
[[ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 0.  0.  1.]
 [ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]
 [ 1.  0.  0.]
 [ 0.  0.  1.]
 [ 0.  1.  0.]]
**inverted**
['cold']


In [5]:
# adjusting padding sequences of varied length
from keras.preprocessing.sequence import pad_sequences
# define sequences
sequences = [
[1, 2, 3, 4],
[1, 2, 3],
[1]
]
# pad sequence
padded = pad_sequences(sequences)
print(padded)

Using TensorFlow backend.


[[1 2 3 4]
 [0 1 2 3]
 [0 0 0 1]]


In [6]:
from keras.preprocessing.sequence import pad_sequences
# define sequences
sequences = [
[1, 2, 3, 4],
[1, 2, 3],
[1]
]
# pad sequence
padded = pad_sequences(sequences, padding='post')
print(padded)
# pre seq padding is the default

[[1 2 3 4]
 [1 2 3 0]
 [1 0 0 0]]


In [8]:
from keras.preprocessing.sequence import pad_sequences
# define sequences
sequences = [
[1, 2, 3, 4],
[1, 2, 3],
[1]
]
#truncate sequence
truncated= pad_sequences(sequences, maxlen=2)
print(truncated)
#the pad sequences handles the truncation when the max len is set to 2

[[3 4]
 [2 3]
 [0 1]]


In [10]:
truncated= pad_sequences(sequences, maxlen=2, truncating='post')
print(truncated)
# truncation default is pre.. 

[[1 2]
 [1 2]
 [0 1]]


In [11]:
from pandas import DataFrame
# define the sequence
df = DataFrame()
df['t'] = [x for x in range(10)]
print(df)

   t
0  0
1  1
2  2
3  3
4  4
5  5
6  6
7  7
8  8
9  9


In [12]:
# the shift funciton.. useful in LSTM.
from pandas import DataFrame
# define the sequence
df = DataFrame()
df['t'] = [x for x in range(10)]
# shift forward
df['t-1'] = df['t'].shift(1)
#create a new column.. t-1.. and populate it with contentes of t
# shifted by 1 place
print(df)

   t  t-1
0  0  NaN
1  1    0
2  2    1
3  3    2
4  4    3
5  5    4
6  6    5
7  7    6
8  8    7
9  9    8


In [13]:
df['t+1'] = df['t'].shift(-1)
print(df)
# create a new column.. populate it it with t.. shifted by -1 places

   t  t-1  t+1
0  0  NaN    1
1  1    0    2
2  2    1    3
3  3    2    4
4  4    3    5
5  5    4    6
6  6    5    7
7  7    6    8
8  8    7    9
9  9    8  NaN


In [14]:
from numpy import array
data = array([
[0.1, 1.0],
[0.2, 0.9],
[0.3, 0.8],
[0.4, 0.7],
[0.5, 0.6],
[0.6, 0.5],
[0.7, 0.4],
[0.8, 0.3],
[0.9, 0.2],
[1.0, 0.1]])
data = data.reshape(1, 10, 2)
print(data.shape)

(1, 10, 2)


In [23]:
data[:,3:6,1:2]

array([[[ 0.7],
        [ 0.6],
        [ 0.5]]])

In [17]:
data1 = array([
[0.1, 1.0],
[0.2, 0.9],
[0.3, 0.8],
[0.4, 0.7],
[0.5, 0.6],
[0.6, 0.5],
[0.7, 0.4],
[0.8, 0.3],
[0.9, 0.2],
[1.0, 0.1]])
print data1.shape

(10, 2)
