In [1]:
import pandas as pd
import datetime
import json
import pandas_datareader as pdr
import numpy as np
import matplotlib.pyplot as plt
from keras import models
from keras import layers
from keras import optimizers
from matplotlib.image import imread
from keras.preprocessing import image
from helper import data_to_tensor

Using TensorFlow backend.


### Pulling stock list from Virtus LifeSci Biotech ETF

In [2]:
stocks = pd.read_excel('positions_bbc.xls', index_col=0, skiprows=1).Ticker.values

In [4]:
pdr.get_data_yahoo(stocks[0], start='2000-01-01').head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-07,17.0,15.0,16.0,15.0,59800,15.0
2014-01-08,18.0,16.0,17.5,17.0,51100,17.0
2014-01-09,17.75,17.1,17.5,17.5,159900,17.5
2014-01-10,20.0,17.5,17.5,18.0,157100,18.0
2014-01-13,18.5,18.200001,18.4,18.200001,5000,18.200001


#### Creating dictionary and JSON

In [3]:

dataframe_dict= {}
for stock in stocks: 

    df = pdr.get_data_yahoo(stock, start='2000-01-01')
    # Change inde to a date  string
    df['Date'] = df.index.strftime("%Y-%m-%d %H:%M:%S")
    
    dataframe_dict[stock] = df.to_dict('records')


In [4]:

with open('stock_info.json', 'w') as f:
    json.dump(dataframe_dict, f)


### Creating a dictionary and JSON for Benchmark index and ETF

```python
benchmark_dict = {}
# SP500(broader market)
df = pdr.get_data_yahoo('SPY', start='2000-01-01')
df.index = df.index.strftime("%Y-%m-%d %H:%M:%S")
benchmark_dict['SPY'] = df.to_dict('index')

# Vanguard Health Care Index Fund (Healthcare Industry)
df = pdr.get_data_yahoo('VHT', start='2009-01-01')
df.index = df.index.strftime("%Y-%m-%d %H:%M:%S")
benchmark_dict['VHT'] = df.to_dict('index')

#SPDR S&P Pharmaceuticals ETF (Pharmaceutical Drug Industry)
df = pdr.get_data_yahoo('XPH', start='2009-01-01')
df.index = df.index.strftime("%Y-%m-%d %H:%M:%S")
benchmark_dict['XPH'] = df.to_dict('index')
```

``` python
with open('benchmark.json', 'w') as fp:
    json.dump(benchmark_dict, fp)
```

### Creating the Target

data = []
target = []
period = 120
for stock in stocks:
    print(stock)
    df = pdr.get_data_yahoo(stock, start='2000-01-01')
    # Creating an array for he past  tradings from initial data.
    #df['data'] = [data_to_tensor(df.iloc[i-period:i]) for i in range(0,len(df))]
   
    df['Ticker'] = stock

    # Feature Engineering for the maximum and minimums
    df['Next_Day'] = [df['Close'].iloc[:i+1][-1]/ df['Close'].iloc[i]- 1 for i in range(0,len(df))]
    df['High_Max_3_Days'] = [df['High'][i: i+3].max()/ df['Close'][i]- 1 for i in range(0,len(df))]
    df['Low_Min_3_Days']= [df['Low'][i: i+3].min()/ df['Close'][i]-1 for i in range(0,len(df))]
    df['High_Max_10_Days'] = [df['High'][i: i+10].max()/ df['Close'][i]- 1 for i in range(0,len(df))]
    df['Low_Min_10_Days']= [df['Low'][i: i+10].min()/ df['Close'][i]-1 for i in range(0,len(df))]
    df['High_Max_30_Days'] = [df['High'][i: i+30].max()/ df['Close'][i]-1 for i in range(0,len(df))]
    df['Low_Min_30_Days']= [df['Low'][i: i+30].min()/ df['Close'][i]-1 for i in range(0,len(df))]
    for i in df.iloc[:-31].to_dict('records'):
        data.append(i)

#### Dividing all volume data by largest volume value

In [18]:
df['Volume'] = df['Volume']/ df['Volume'].max()

In [19]:
df['Volume'].describe()

count    1057.000000
mean        0.019978
std         0.048438
min         0.000119
25%         0.004902
50%         0.010495
75%         0.020447
max         1.000000
Name: Volume, dtype: float64

#### Scaling High, Low, Open, Close and droping Adj Close

In [10]:
columns = ['High', 'Low', 'Open', 'Close']
for stock in stocks:
    max_value = np.array(df.loc[df.Ticker == stock][columns]).max()
    df.loc[df.Ticker == stock][columns] = df.loc[df.Ticker == stock][columns]/ max_value
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]


In [11]:
df.High.max()

3000.0

In [9]:
np.array(max_value).max()

56.0

In [33]:
with open('stock_info.json', 'r') as f:
    new = json.load(f)

#### Creating a image to be processed

In [30]:
for stock in stocks:
    stock_dict = new[stock]
    date_list = [date for date in stock_dict]
    for date in date_list:
        
        high =  stock_dict[date]['High']

In [34]:
a = new[stocks[-50]]

In [37]:
a = new[stocks[-50]]
a[0]

{'High': 15.970000267028809,
 'Low': 15.970000267028809,
 'Open': 15.970000267028809,
 'Close': 15.970000267028809,
 'Volume': 350.0,
 'Adj Close': 15.970000267028809}

In [19]:
np.shape(high)

(1057,)

In [25]:
high

['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close']