In [6]:
import quandl
quandl.ApiConfig.api_key = 'api'

In [2]:
mydata = quandl.get("WIKI/F", start_date="2017-12-04", end_date="2017-12-09")
print(mydata)

              Open   High    Low  Close      Volume  Ex-Dividend  Split Ratio  \
Date                                                                            
2017-12-04  12.650  12.81  12.59  12.63  39394346.0          0.0          1.0   
2017-12-05  12.610  12.62  12.40  12.43  44378959.0          0.0          1.0   
2017-12-06  12.400  12.43  12.28  12.38  25879892.0          0.0          1.0   
2017-12-07  12.395  12.59  12.33  12.53  25404223.0          0.0          1.0   
2017-12-08  12.530  12.61  12.43  12.61  21217510.0          0.0          1.0   

            Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
Date                                                                 
2017-12-04     12.650      12.81     12.59       12.63   39394346.0  
2017-12-05     12.610      12.62     12.40       12.43   44378959.0  
2017-12-06     12.400      12.43     12.28       12.38   25879892.0  
2017-12-07     12.395      12.59     12.33       12.53   25404223.0  
2017-12-08  

In [4]:
print(type(mydata))

<class 'pandas.core.frame.DataFrame'>


In [7]:
import pandas as pd
def get_data_quandl(symbol,start_date,end_date):
    data = quandl.get(symbol,start_date=start_date,end_date=end_date)
    return data

In [10]:
def generate_features(df):
        df_new = pd.DataFrame()
        # 6 Original features
        df_new['open'] = df['Open']
        df_new['open_1'] = df['Open'].shift(1)
        df_new['close_1'] = df['Close'].shift(1)
        df_new['high_1'] = df['High'].shift(1)
        df_new['low_1'] = df['Low'].shift(1)
        df_new['volume_1'] = df['Volume'].shift(1)
        
        ## Derived Featues
        
        # Averages of closing price
        df_new['avg_price_5'] = df['Close'].rolling(5).mean().shift(1)
        df_new['avg_price_30'] = df['Close'].rolling(21).mean().shift(1)
        df_new['avg_price_365'] = df['Close'].rolling(252).mean().shift(1)
        # ratios of averages of closing price
        df_new['ratio_avg_price_5_30'] = df_new['avg_price_5'] / df_new['avg_price_30']
        df_new['ratio_avg_price_5_365'] = df_new['avg_price_5'] / df_new['avg_price_365']
        df_new['ratio_avg_price_30_365'] = df_new['avg_price_30'] / df_new['avg_price_365']
        
        # Averages of volumes traded
        df_new['avg_volume_5'] = df['Volume'].rolling(5).mean().shift(1)
        df_new['avg_volume_30'] = df['Volume'].rolling(21).mean().shift(1)
        df_new['avg_volume_365'] = df['Volume'].rolling(252).mean().shift(1)
        # ratios of averages of volumes traded
        df_new['ratio_avg_volume_5_30'] = df_new['avg_volume_5'] / df_new['avg_volume_30']
        df_new['ratio_avg_volume_5_365'] = df_new['avg_volume_5'] / df_new['avg_volume_365']
        df_new['ratio_avg_volume_30_365'] = df_new['avg_volume_30'] / df_new['avg_volume_365']

        # standard deviation of prices
        df_new['std_price_5'] = df['Close'].rolling(5).std().shift(1)
        df_new['std_price_30'] = df['Close'].rolling(21).std().shift(1)
        df_new['std_price_365'] = df['Close'].rolling(252).std().shift(1)
        # Ratios of S.D. prices
        df_new['ratio_std_price_5_30'] = df_new['std_price_5'] / df_new['std_price_30']
        df_new['ratio_std_price_5_365'] = df_new['std_price_5'] / df_new['std_price_365']
        df_new['ratio_std_price_30_365'] = df_new['std_price_30'] / df_new['std_price_365']
        
        # standard deviation of volumes traded
        df_new['std_volume_5'] = df['Volume'].rolling(5).std().shift(1)
        df_new['std_volume_30'] = df['Volume'].rolling(21).std().shift(1)
        df_new['std_volume_365'] = df['Volume'].rolling(252).std().shift(1)
        # Ratios of S.D. volumes traded
        df_new['ratio_std_volume_5_30'] = df_new['std_volume_5'] / df_new['std_volume_30']
        df_new['ratio_std_volume_5_365'] = df_new['std_volume_5'] / df_new['std_volume_365']
        df_new['ratio_std_volume_30_365'] = df_new['std_volume_30'] / df_new['std_volume_365']
        
        # Calculating returns
        df_new['return_1'] = ((df['Close'] - df['Close'].shift(1)) / df['Close'].shift(1)).shift(1)
        df_new['return_5'] = ((df['Close'] - df['Close'].shift(5)) / df['Close'].shift(5)).shift(1)
        df_new['return_30'] = ((df['Close'] - df['Close'].shift(21)) / df['Close'].shift(21)).shift(1)
        df_new['return_365'] = ((df['Close'] - df['Close'].shift(252)) / df['Close'].shift(252)).shift(1)
        # averages of returns
        df_new['moving_avg_5'] = df_new['return_1'].rolling(5).mean().shift(1)
        df_new['moving_avg_30'] = df_new['return_1'].rolling(21).mean().shift(1)
        df_new['moving_avg_365'] = df_new['return_1'].rolling(252).mean().shift(1)
        
        # the target
        df_new['close'] = df['Close']
        # dropping N/A valued rows (because of moving averages)
        df_new = df_new.dropna(axis=0)
        
        return df_new

In [11]:
symbol = 'WIKI/F'
start = '2001-01-01'
end = '2004-12-31'
data_raw = get_data_quandl(symbol, start, end)
data = generate_features(data_raw)
print(data.round(decimals=3).head())

             open  open_1  close_1  high_1  low_1    volume_1  avg_price_5  \
Date                                                                         
2002-01-10  15.85   16.50    16.31   16.83  16.30   7033600.0       16.608   
2002-01-11  15.26   15.85    15.29   15.90  15.10  14260600.0       16.320   
2002-01-14  15.50   15.26    15.50   15.75  14.90  20725100.0       16.032   
2002-01-15  15.20   15.50    15.35   15.50  15.15   7188300.0       15.802   
2002-01-16  14.90   15.20    15.04   15.60  14.91   9463000.0       15.498   

            avg_price_30  avg_price_365  ratio_avg_price_5_30  ...  \
Date                                                           ...   
2002-01-10        15.969         23.237                 1.040  ...   
2002-01-11        15.904         23.191                 1.026  ...   
2002-01-14        15.849         23.148                 1.012  ...   
2002-01-15        15.787         23.108                 1.001  ...   
2002-01-16        15.733         