In [1]:
from scipy.stats import skew, kurtosis
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
import pandas as pd

In [44]:
a = np.array([0,1,2,90,60,70])

'''
Mean is average of all the data points.
'''
mean = np.mean(a)
print("Mean is",mean)

'''
Skewness is the measure of asymmetry of a distribution of a random variable around its mean.
If skewness > 0, the weight of the distribution is concentrated on the left, and distribution is left-tailed.
Other way around for negative skewness.
'''
skewness = skew(a, axis=0, bias=True)
print("Skewness is:",skewness)

'''
Kurtosis is the measure of the peakness of data
'''
kurtosis = kurtosis(a, axis=0, fisher=True, bias=True)
print("Kurtosis is", kurtosis)

Mean is 37.1666666667
Skewness is: 0.16999752201785548
Kurtosis is -1.7531393355613718


In [10]:
'''
Variance is sum of square of distances of points from mean divided by number of samples.
(E (x - u)^2)/n
'''
variance = np.var(a)
print("Variance is",variance)

Variance is 1386.13888889


In [11]:
"""
Regression analysis or beta is the slope of the data (stock prices).
"""
x = np.arange(0,len(a),1)
slope, intercept, r_value, p_value, std_err = stats.linregress(x,a)
print("Slope/Beta is", slope)

Slope/Beta is 17.5714285714


In [13]:
'''
Standard devaiation is square root of variance
'''
std = np.std(a)
print("Standard deviation is",std)

Standard deviation is 37.2308862222


In [29]:
from statsmodels.tsa.ar_model import AR

signal = np.linspace(0,100, 500)
ar_mod = AR(signal)

In [30]:
print(ar_mod)
ar_res = ar_mod.fit(4)

print(ar_res.predict(4, 60))
print(signal[4:60])

<statsmodels.tsa.ar_model.AR object at 0x7fc1607c0438>
[  0.80160321   1.00200401   1.20240481   1.40280561   1.60320641
   1.80360721   2.00400802   2.20440882   2.40480962   2.60521042
   2.80561122   3.00601202   3.20641283   3.40681363   3.60721443
   3.80761523   4.00801603   4.20841683   4.40881764   4.60921844
   4.80961924   5.01002004   5.21042084   5.41082164   5.61122244
   5.81162325   6.01202405   6.21242485   6.41282565   6.61322645
   6.81362725   7.01402806   7.21442886   7.41482966   7.61523046
   7.81563126   8.01603206   8.21643287   8.41683367   8.61723447
   8.81763527   9.01803607   9.21843687   9.41883768   9.61923848
   9.81963928  10.02004008  10.22044088  10.42084168  10.62124248
  10.82164329  11.02204409  11.22244489  11.42284569  11.62324649
  11.82364729  12.0240481 ]
[  0.80160321   1.00200401   1.20240481   1.40280561   1.60320641
   1.80360721   2.00400802   2.20440882   2.40480962   2.60521042
   2.80561122   3.00601202   3.20641283   3.40681363   3.60

In [3]:
"""
Exponential Moving Average is calculated for each value, which is based on previous value's EMA and current price. 
Hence the code will return an array of same length, but we need to pick up the last day's value. 
"""

df = pd.DataFrame({'A' : np.linspace(0,100,500)})


ema = df.ewm(alpha = 0.6).mean().iloc[-1,0]
print("EMA is", ema)

EMA is 99.8663994656


# Returns all the features

In [9]:
a = np.array([[0,1,2],[2,3,4]])
Feature_array = []
for row in range(0,a.shape[0]):
    mean = np.mean(a[row])
    f = np.array([mean])
    skewness = skew(a[row], axis=0, bias=True)
    f = np.append(f, skewness)
    kurt = kurtosis(a[row], axis=0, fisher=True, bias=True)
    f= np.append(f, kurt)
    variance = np.var(a[row])
    f = np.append(f, variance)
    std = np.std(a[row])
    f = np.append(f, std)
    x = np.arange(0,len(a[row]),1)
    slope, intercept, r_value, p_value, std_err = stats.linregress(x,a[row])
    f = np.append(f, slope)
    df = pd.DataFrame({'A' : a[row]})
    f = np.append(f,df.ewm(alpha = 0.6).mean().iloc[-1,0])
    print(f)
    Feature_array.append(f)

[ 1.          0.         -1.5         0.66666667  0.81649658  1.
  1.53846154]
[ 3.          0.         -1.5         0.66666667  0.81649658  1.
  3.53846154]


In [61]:
Feature_array = np.asarray(Feature_array)