In [312]:
import numpy as np
from scipy import signal
from scipy.stats import linregress
import matplotlib.pyplot as plt
from scipy.fftpack import fft, ifft
import csv
import re
import pandas as pd
from matplotlib import style
import matplotlib.pyplot as plt
import pandas
import datetime as dt
from sklearn.linear_model import LinearRegression
from scipy.signal import blackman
from sklearn.preprocessing import PolynomialFeatures
from sklearn.externals import joblib
from sklearn.linear_model import ElasticNetCV

style.use('ggplot')


In [210]:
test_data = np.transpose(np.genfromtxt('Ham_TRAIN', delimiter=',')[:,1:])
test_classes=np.genfromtxt('Ham_TRAIN', delimiter=',')[:,0]

In [309]:
class TimeSeriesClassifier:
    # TimeSeriesClassifier takes a data input which is an m x n numpy array where each column is a time series with m entries
    # The reason for this Class is to avoid computing the fft 100 times during play, as well as to avoid
    # any unnecessary computation. Roughly, it serves as an organized record of the operations you have had to do.
    # hopefully the procedures written will take into account any required operation that hasn't been computed, and compute it
    # This will save a bit of thinking and button pressing. 
    def __init__(self,data,classes):
        
        #Time Series Array
        self.data=data
        #Time Series Classes (if classified, otherwise just pass None)
        self.classes=classes
        #Dimensions of Time Series Array
        self.size=self.data.shape
        
        #Raw FFT of Each Time Series
        self.fft=np.array([])
        
        #Mean Value of Each Time Series
        self.mean=np.array([])
        
        #The linear term in a linear regression on each time series (i.e. scaled gain)
        self.slope=np.array([])
        
        #The intercepts from linreg on each time series
        self.intercept=np.array([])      
        
        #The rvalues from linreg on each time series
        self.rvalues=np.array([])        
        
        #The pval from linreg on each time series
        self.pvalues=np.array([])        
        
        #The stderr from linreg on each time series
        self.stderrors=np.array([])
        
        #Time Series array with linear trends removed, and "uniformly" scaled (divide by the mean value)
        self.detrend=np.array([])
        
        #FFT of windowed detrended time series
        self.adjusted_fft_windowed=np.array([])
        
        
        self.max=np.array([])
        self.min=np.array([])
        
        #Laplace Transform of Data
        self.laplace=np.array([])
        
        #Storage for a Continuous Wavelet Transform:
        self.cwt=np.array([])

        
        # PCA on FFT or adjusted FFT (try to get the transpose/not transpose correct)
        self.fft_pca=np.array([])
        self.adj_fft_pca=np.array([])
        
        #Arc Length Histogram
        self.arc_hist=np.array([])
        
        
        #ElasticNetCV regression coefficients (or whatever other poly reg I like best...)
        self.elastic_net=[]            
        
        #PCA on Raw Data
        self.pca=np.array([])
        
    # FUNCTIONS:    
        
    #Return FFT of Each Time Series (Calculate if Needed)
    def get_fft(self):
        if self.fft.size == 0:
            self.fft=fft(self.data)
        return self.fft
        
        
    #Return Mean Value of Each Time Series
    def get_mean(self):
        if self.mean.size == 0:
            self.mean=np.mean(self.data,axis=0)
        return self.mean

        #Returns the linear term in a linear regression on each time series (i.e. scaled gain)
    def get_slope(self):
        if self.slope.size == 0:
            intercepts=np.zeros(self.size[1])
            slopes=np.zeros(self.size[1])
            rvals=np.zeros(self.size[1])
            pvals=np.zeros(self.size[1])
            stderrs=np.zeros(self.size[1])
            for idx, col in zip(range(self.size[1]),self.data.T):
                s,i,r,p,st=linregress(range(self.size[0]),col)
                intercepts[idx]=i
                slopes[idx]=s
                rvals[idx]=r
                pvals[idx]=p
                stderrs[idx]=st
            self.intercept=intercepts
            self.slope=slopes
            self.rvalues=rvals
            self.pvalues=pvals
            self.stderrors=stderrs
        return self.slope
        
        #Returns the intercepts from linreg on each time series
    def get_intercept(self):
        if self.intercept.size == 0:
            intercepts=np.zeros(self.size[1])
            slopes=np.zeros(self.size[1])
            rvals=np.zeros(self.size[1])
            pvals=np.zeros(self.size[1])
            stderrs=np.zeros(self.size[1])
            for idx, col in zip(range(self.size[1]),self.data.T):
                s,i,r,p,st=linregress(range(self.size[0]),col)
                intercepts[idx]=i
                slopes[idx]=s
                rvals[idx]=r
                pvals[idx]=p
                stderrs[idx]=st
            self.intercept=intercepts
            self.slope=slopes
            self.rvalues=rvals
            self.pvalues=pvals
            self.stderrors=stderrs
        return self.intercept
        
        #Time Series array with linear trends removed, and "uniformly" scaled (divide by the mean value)
    def get_detrend(self):
        if self.detrend.size==0:
            linout=self.data-np.repeat(np.arange(self.size[0]).T,self.size[1]).reshape(self.size)\
            *self.get_slope().T-np.repeat(self.get_intercept(),self.size[0]).reshape([self.size[1],self.size[0]]).T
            self.detrend=np.divide(linout,self.get_mean())
        return self.detrend
                          
                                  
        #FFT of windowed detrended time series
    def get_adjusted_fft_windowed(self):
        if self.adjusted_fft_windowed.size==0:
            self.adjusted_fft_windowed=fft((self.get_detrend().T*blackman(self.size[0])).T)
        return self.adjusted_fft_windowed

    def get_max(self):
        if self.max.size==0:
            self.max=np.amax(self.data,axis=0)
        return self.max
    
    def get_min(self):
        if self.min.size==0:
            self.min=np.amin(self.data,axis=0)
        return self.min

        
        #Laplace Transform of Data
    def get_laplace(self):
        if self.laplace.size==0:
            
            self.laplace=np.amin(self.data,axis=0)
        return self.min        
        #Storage for a Continuous Wavelet Transform:
        self.cwt1=[]

        # PCA on FFT or adjusted FFT (try to get the transpose/not transpose correct)
        self.fft_pca=[]
        self.adj_fft_pca=[]
        
        #Arc Length Histogram
        self.arc_hist=[]
        
        
        #ElasticNetCV regression coefficients (or whatever other poly reg I like best...)
    def get_elastic_net:
        if self.elastic_net.size==0:
            
            el=ElasticNetCV()
            el.fit()
            for idx, col in zip(range(self.size[1]),self.data.T):
                el.fit(range(self.size[0]),col)
                
            self.elastic_net=np.amin(self.data,axis=0)
        return self.min  
        
        
        #PCA on Raw Data
        self.pca=[]
        

To Do:

        # - - - - - - To Do: - - - - - - 
        #    
        #    Preprocessing
        #    
        #    
        #    
        #    Supervised
        #    
        #    
        #    Unsupervised
        #    
        #    
        #    
        #    Plots
        #    
        #    Comments
        #  
        #    Pep8
        #  
        #    Try TSNE instead of PCA
        #  
        #    Import Dynamic Time Warp Method from Riverside
        #    
        #    Try Using other Clustering Methods
    
        

To Examine:

    Mixed Dimension PCA
    
    Explained Variance from PCA

    Make an Analogous "Which method works best" to run similar to the UCR program. (And try loads of them)

    Test things with Time-It.

    Laplace Analysis 
    
    


In [310]:
TestClass=TimeSeriesClassifier(test_data,test_classes)

In [311]:
TestClass.get_min().shape

(109,)

In [303]:
TestClass.get_adjusted_fft_windowed().shape

(431, 109)

In [323]:
banana=ElasticNetCV(alpha=.5)

TypeError: __init__() got an unexpected keyword argument 'alpha'

In [324]:
banana.fit(np.arange(TestClass.size[0]).reshape(-1,1),TestClass.data[:,0])

ElasticNetCV(alphas=None, copy_X=True, cv=None, eps=0.001, fit_intercept=True,
       l1_ratio=0.5, max_iter=1000, n_alphas=100, n_jobs=1,
       normalize=False, positive=False, precompute='auto',
       random_state=None, selection='cyclic', tol=0.0001, verbose=0)

In [322]:
banana.coef_

array([5.96351879e-20])