In [45]:
from datetime import datetime, date
from nowtrade.symbol_list import StockList
from nowtrade.data_connection import YahooConnection
from nowtrade import dataset 
from nowtrade import technical_indicator
from nowtrade import criteria
from nowtrade import criteria_group
from nowtrade import trading_profile
from nowtrade import trading_amount
from nowtrade import trading_fee
from nowtrade import strategy
from nowtrade import ensemble
from nowtrade.action import Long
from nowtrade.action import Short
from nowtrade.action import LongExit
from nowtrade.action import ShortExit

In [46]:
%matplotlib inline
import pandas as pd
import numpy as np
from pathlib import Path


In [54]:
symbol = 'BHARTIARTL.NS'
symbols=[symbol]
today = date.today()
train_start = datetime(today.year-10, today.month, 28)
train_end = datetime.fromisoformat(today.isoformat())
csv_file_path = Path('{0}{1}_{2}_{3}.csv'.format(symbol,today.year,today.month,today.day))
print("file exists ",csv_file_path.exists())
try:
    if (not csv_file_path.exists()):
        data = YahooConnection().get_data(symbol, train_start, train_end)
        data.to_csv(csv_file_path,index=True,header=True)
except Exception as e:
    print('Error occured while scraping and storing stock-data :{0}'.format(e))

train_dataset= pd.read_csv(csv_file_path,names=['Date','High', 'Low', 'Open',  'Close','Volume','Adj_Close'],header=0)


file exists  True


In [55]:
train_dataset=train_dataset.set_index(train_dataset['Date'])
train_dataset=train_dataset.drop('Date',1)
train_dataset.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-02-28,306.67157,298.086609,302.998871,304.192505,6302028.0,287.710846
2011-03-01,311.170654,295.837067,304.00885,309.97702,5345999.0,293.181976
2011-03-03,310.711548,300.336151,308.048828,303.136597,3452481.0,286.71225
2011-03-04,306.304291,297.627533,303.503845,300.198425,3559151.0,283.933197
2011-03-07,299.509796,292.990723,298.407959,297.902985,2517904.0,281.762146


In [56]:
tt=train_dataset.sort_index(ascending=True)

In [57]:
train_data=tt.loc[today.replace(year=today.year-10).isoformat():today.replace(day=1).isoformat()]

In [58]:
test_data=tt.loc[today.replace(day=1).isoformat():today.isoformat()]

In [59]:
train_data.tail()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-25,587.849976,569.549988,579.0,580.75,12273498.0,580.75
2021-01-27,581.75,562.349976,580.0,570.799988,15280757.0,570.799988
2021-01-28,575.849976,562.75,566.5,570.900024,14921448.0,570.900024
2021-01-29,578.5,550.599976,578.5,553.599976,25051621.0,553.599976
2021-02-01,581.099976,549.200012,559.0,579.150024,13951025.0,579.150024


In [60]:
test_data.tail()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-02-04,623.0,595.650024,614.900024,600.599976,51018799.0,600.599976
2021-02-05,605.900024,576.25,603.799988,581.099976,34811620.0,581.099976
2021-02-08,603.650024,584.700012,589.0,597.299988,21247165.0,597.299988
2021-02-09,607.75,595.0,598.900024,598.950012,20541765.0,598.950012
2021-02-10,607.75,587.650024,604.950012,589.450012,11531017.0,589.450012


In [61]:
#rf = ensemble.Ensemble(['Open', 'High', 'Low',  'Volume'], ['Close'])

In [62]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import precision_score,recall_score

In [63]:
train_values=train_data

In [64]:
train_columns=['Open', 'High', 'Low',  'Volume']
train_test_column=['Close']

In [65]:
from sklearn.ensemble import RandomForestClassifier

In [66]:
rf=RandomForestClassifier(n_jobs=2,max_depth=7,n_estimators=500)

In [67]:
rf.fit(train_data[train_columns],np.asarray(train_data['Close'],dtype="|S6"))

RandomForestClassifier(max_depth=7, n_estimators=500, n_jobs=2)

In [68]:
predicted=rf.predict(test_data[train_columns])

In [69]:
predicted[:]

array([b'579.15', b'594.54', b'602.65', b'602.65', b'581.34', b'586.95',
       b'602.65', b'602.65'], dtype='|S6')

In [70]:
test_data

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-02-01,581.099976,549.200012,559.0,579.150024,13951025.0,579.150024
2021-02-02,605.650024,579.25,584.900024,599.349976,21311866.0,599.349976
2021-02-03,622.5,599.450012,609.400024,608.849976,30076561.0,608.849976
2021-02-04,623.0,595.650024,614.900024,600.599976,51018799.0,600.599976
2021-02-05,605.900024,576.25,603.799988,581.099976,34811620.0,581.099976
2021-02-08,603.650024,584.700012,589.0,597.299988,21247165.0,597.299988
2021-02-09,607.75,595.0,598.900024,598.950012,20541765.0,598.950012
2021-02-10,607.75,587.650024,604.950012,589.450012,11531017.0,589.450012


In [63]:
# class MongoDatabaseConnection(DataConnection):
#     """
#     MongoDB connection to retrieve data.
#     update username, password and database
#     """
#     def __init__(self, host='127.0.0.1', port=27017, database='stock-data', \
#                  username=user10, password='UsEr10810'):
#         DataConnection.__init__(self)
#         from pymongo import MongoClient
#         self.connection = None
#         self.database = None
#         self.host = host
#         self.port = port
#         self.database = database
#         self.username = username
#         self.password = password
#         #self.connection = MongoClient(host, port)
#         self.connection = MongoClient("mongodb+srv://mongocloud:{0}@mongocloud.jhswk.mongodb.net/{1}?retryWrites=true&w=majority".format(password,database))
#         self.database = self.connection[database]

#     def get_data(self, symbol, start, end, symbol_in_column=True):
#         """
#         Returns a dataframe of the symbol data requested.
#         """
#         from pymongo import ASCENDING
#         symbol = str(symbol).upper()
#         results = self.database[symbol].find({'_id': \
#                               {'$gte': start, '$lte': end}}\
#                               ).sort('datetime', ASCENDING)
#         ret = pd.DataFrame.from_dict(list(results))
#         if len(ret) < 1:
#             raise NoDataException()
#         ret.rename(columns={'open': 'Open', \
#                             'high': 'High', \
#                             'low': 'Low', \
#                             'close': 'Close', \
#                             'volume': 'Volume', \
#                             'adj_close': 'Adj Close', \
#                             '_id': 'Date'}, \
#                            inplace=True)
#         ret = ret.set_index('Date')
#         if symbol_in_column:
#             ret.rename(columns=lambda name: '%s_%s' %(symbol, name), inplace=True)
#         return ret

#     def set_data(self, data_frame, symbols, volume=True, adj_close=True):
#         """
#         Stores Open, Close, High, Low, Volume, and Adj Close of
#         symbols specified using the data in the DataFrame provided.
#         Typically you'd pull data using another connection and
#         feed it's data_frame to this function in order to store
#         the data in a local MongoDB.
#         """
#         for symbol in symbols:
#             symbol = str(symbol).upper()
#             if adj_close:
#                 data = data_frame.loc[:, ['%s_Open' %symbol, \
#                                           '%s_Close' %symbol, \
#                                           '%s_High' %symbol, \
#                                           '%s_Low' %symbol, \
#                                           '%s_Volume' %symbol, \
#                                           '%s_Adj Close' %symbol]]
#                 data.columns = ['open', 'close', 'high', 'low', 'volume', 'adj_close']
#             elif volume:
#                 data = data_frame.loc[:, ['%s_Open' %symbol, \
#                                           '%s_Close' %symbol, \
#                                           '%s_High' %symbol, \
#                                           '%s_Low' %symbol, \
#                                           '%s_Volume' %symbol]]
#                 data.columns = ['open', 'close', 'high', 'low', 'volume']
#             else:
#                 data = data_frame.loc[:, ['%s_Open' %symbol, \
#                                           '%s_Close' %symbol, \
#                                           '%s_High' %symbol, \
#                                           '%s_Low' %symbol]]
#                 data.columns = ['open', 'close', 'high', 'low']
#             for row in data.iterrows():
#                 values = dict(row[1])
#                 values['_id'] = row[0]
#                 self.database[symbol].insert(values)
                
# def populate_mongo_day(symbols, start, end, database='symbol-data'):
#     """
#     Helper function to populate a local mongo db with daily stock data.
#     Uses the YahooConnection class.
#     """
#     mgc = MongoDatabaseConnection(database=database)
#     for symbol in symbols:
#         symbol = symbol.upper()
#         yahoo = YahooConnection()
#         try:
#             data = yahoo.get_data(symbol, start, end)
#             mgc.set_data(data, [symbol])
#         except Exception, error: # pylint: disable=broad-except
#             print 'Error for %s (%s - %s): %s' %(symbol, start, end, error)
            
