In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import credentials
import requests
from sklearn.preprocessing import StandardScaler
from pandas import DataFrame
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import calendar
import datetime as dt
import pymongo
from pymongo import MongoClient
# pulling data API
key =credentials.login['key_isaham']  # extract password

In [2]:
# Define the instruments to download.
tech = ["GREATEC",
        "UWC",       
        "D&O", 
       "GENETEC", 
       "UNISEM", 
       "FRONTKN", 
       "ATAIMS", 
       "FPI", 
       "PENTA", 
       "DUFU", 
       "VS", 
       "SKPRES", 
        "MPI", 
        "VITROX"
       ]             
# looping through all stocks

# Scale dictionary of dataframes for training set(scaling by stock)
ss={}
d = {}

# Scale dictionary of dataframes for prediction set(scaling by stock)
sp={}
nsp={}

for m,stock in enumerate (tech):
        
    print("----------------PROCESSING {}------------------".format(stock))
    symbol = stock    # Which stock to pull    
    response = requests.get("https://admin.isaham.my/api/chart?stock={}&key={}".format(symbol,key))
    data = response.json()
    plt.style.use('fivethirtyeight')
    %config InlineBackend.figure_format = 'retina'
    %matplotlib inline

    # Makes plots bigger
    plt.rcParams["figure.figsize"] = (20,8)
    
    # Tuning OB and OS level
    ob_cross=85
    os_cross=35

    df= pd.DataFrame.from_dict(data)
    df= df[['c','d','h','l','o','v']]
    df['d'] = pd.to_datetime(df['d'], dayfirst= True )
    
    # Identify MA50
    n=50
    ma50=[] 

    for i in range(len(df["c"])-(n-1)):
        ma = round(sum(df["c"][i:i+(n)])/n,4)
        ma50.append(ma)
    
    df["MA50"] = pd.Series(ma50)
    df["MA50"] = df["MA50"].shift(n-1)

    # Filter uptrend price above ma50
    # Calculating the gap of both MA
    df["MAgap"] = df["c"] -  df["MA50"] 
    
    # Set date as index
    df.set_index('d',inplace=True,drop=False)
    df.index.name = None

    # Create signal for uptrend price above ma50 
    # Result not accurate for stochastic when just filter uptrend only
    df["Uptrend"]=df['MAgap']>0
    Uptrend=df["Uptrend"]
    Uptrend_signal=Uptrend[Uptrend==True]
    
    # Signal stochastic
    # Create the "L14" column in the DataFrame
    df['L14'] = df['l'].rolling(window=14).min()

    # Create the "H14" column in the DataFrame
    df['H14'] = df['h'].rolling(window=14).max()

    # Create the "%K" column in the DataFrame
    df['%K'] = 100*((df['c'] - df['L14']) / (df['H14'] - df['L14']) )

    # Create the "%D" column in the DataFrame
    df['%D'] = df['%K'].rolling(window=3).mean()

    # Create a column in the DataFrame showing "TRUE" if buy entry signal is given and "FALSE" otherwise. 
    # A buy is initiated when the %K line crosses up through the %D line and the value of the oscillator is below 20 
    df['Buy OS'] = ((df['%K'] > df['%D']) & (df['%K'].shift(1) < df['%D'].shift(1))) & (df['%D'] < os_cross) & (Uptrend_signal)

    # Create a column in the DataFrame showing "TRUE" if sell entry signal is given and "FALSE" otherwise. 
    # A sell is initiated when the %K line crosses down through the %D line and the value of the oscillator is above 80 
    df['Sell OB'] = ((df['%K'] < df['%D']) & (df['%K'].shift(1) > df['%D'].shift(1))) & (df['%D'] > ob_cross) 
    
    # Create a column in the DataFrame for name of stock
    df['Stock_Name'] = symbol

    # Define overbought and oversold
    overbought =df['Sell OB']
    oversold =df['Buy OS']
    
    # Use index(date) for create strategy using overbought and oversold
    date_os=list(oversold[oversold == True].index)
    date_ob=list(overbought[overbought == True].index)
    
    # Scaling certain features only
    col_names_pred = ['c', 'l', 'o', 'v','h']  
    features_pred = df[col_names_pred]
    
    # Scaling for prediction data
    scaler_pred = StandardScaler().fit(features_pred.values)
    features_pred = scaler_pred.transform(features_pred.values)
    
    # Convert the array back to a dataframe
    dataset_pred = DataFrame(features_pred)
    
    # Prepare prediction data that no need to scale
    col_notscale_pred=['d','Stock_Name','Buy OS']
    features_notscale_pred=df[col_notscale_pred]
    
    # Saving in multiple variable
    sp["{}".format(stock)] = dataset_pred
    nsp["{}".format(stock)] = features_notscale_pred
    
    # Transaction recording
    buy = []
    sell = []
    sdate = []
    bdate = []
    wr = []
    pct = []
    vol = []
    ope =[]
    high = []
    low = []

    # Record close,open,vol,high,low for buy transaction
    for n,i in enumerate(date_os):
        buy.append(df['c'].loc[i])
        vol.append(df['v'].loc[i])
        ope.append(df['o'].loc[i])
        high.append(df['h'].loc[i])
        low.append(df['l'].loc[i])
        bdate.append(i)
        r=[]
        
        for m,k in enumerate(date_ob):
            
            if k>i:
                r.append(m)
                if len(r) ==1:
                    # Record close,percentage change for sell transaction
                    sell.append(df['c'].loc[k])
                    sdate.append(k)
                    pct_change=((df['c'].loc[k]/df['c'].loc[i])-1)*100
                    pct.append(((df['c'].loc[k]/df['c'].loc[i])-1)*100)
                elif len(r)>1:
                    pass
    diff =len(buy)-len(sell)
    # Checking for final transaction, make sure it is sell, as we want to close the position
    if len(buy) == len(sell):
        pass   
    elif len(buy) != len(sell):
        # Deleting excess buy transaction after we close position
        del(buy[-diff:])
        del(bdate[-diff:])
        del(ope[-diff:])
        del(high[-diff:])
        del(vol[-diff:])
        del(low[-diff:])
    dct = {
        "Buy_Date": bdate,
        "Buy": buy,
        "Sell": sell,
        "Sell_Date": sdate,
        "Open":ope,
        'Volume': vol,
        "Low": low,
        "high": high,
        "Pct_change": pct    
    }  
    # Save document for stochastic strategy
    trans = pd.DataFrame(dct)
    
    # Define variable for winning rate
    wr = []
    for i in range(len(trans)):
        if trans["Pct_change"][i] > 0:
            wr.append(1)
        else:
            wr.append(0)
    
    # Define variable for name of stock
    Name_stock = []
    for s in range(len(trans)):
        Name_stock.append(symbol)
        
    # Add new column
    trans["wr"] = pd.Series(wr)
    trans['Stock'] = pd.Series(Name_stock)
    
    # Scaling certain features only for training data
    col_names = ['Buy', 'Low', 'Open', 'Volume','high']  
    features = trans[col_names]
    
    # Prepare prediction data that no need to scale
    scaler = StandardScaler().fit(features.values)
    features = scaler.transform(features.values)
    
    # Convert the array back to a dataframe
    dataset = DataFrame(features)    
    col_notscale = ['Buy_Date','Stock','wr']
    features_notscale = trans[col_notscale]
    
    # Saving in multiple variable
    d["{}".format(stock)] = dataset
    ss["{}".format(stock)] = features_notscale
    print("----------------DONE {}------------------".format(symbol))

----------------PROCESSING GREATEC------------------
----------------DONE GREATEC------------------
----------------PROCESSING UWC------------------
----------------DONE UWC------------------
----------------PROCESSING D&O------------------
----------------DONE D&O------------------
----------------PROCESSING GENETEC------------------
----------------DONE GENETEC------------------
----------------PROCESSING UNISEM------------------
----------------DONE UNISEM------------------
----------------PROCESSING FRONTKN------------------
----------------DONE FRONTKN------------------
----------------PROCESSING ATAIMS------------------
----------------DONE ATAIMS------------------
----------------PROCESSING FPI------------------
----------------DONE FPI------------------
----------------PROCESSING PENTA------------------
----------------DONE PENTA------------------
----------------PROCESSING DUFU------------------
----------------DONE DUFU------------------
----------------PROCESSING VS---------

In [3]:
# Combining all training data generated
df_notscale = pd.concat(list(ss.values()),ignore_index=True)
df_scale = pd.concat(list(d.values()),ignore_index=True)
df_scale=df_scale.rename(columns={0: 'Buy', 1: 'Low',2: 'Open', 3: 'Volume',4: 'High'})
result_scaler = pd.concat([df_notscale, df_scale], axis=1)

# Setting Dates as index
df_all = result_scaler.sort_values(by="Buy_Date")
df_all.set_index('Buy_Date', inplace=True)

# Encoding the stock names
le = LabelEncoder()
df_all["Stock"] = le.fit_transform(df_all["Stock"])

In [4]:
# Define the splitting date (for training we use 2 years data)
# Splitting train test
day = dt.datetime.today().day
month = dt.datetime.today().month
year = dt.datetime.today().year - 2
date1 = "{}-{}-01".format(year, str(month).zfill(2), str(day).zfill(2))
date2 = str(dt.datetime.today().date() - dt.timedelta(days=1))  # until yesterday only

In [5]:
# Separate input features and target
X = df_all.drop(['wr'], axis=1)
y = df_all.wr

# Splitting train test
X_train, y_train = X[date1: date2], y[date1: date2]

# Resampling
# Concatenate training data back together
X_concat = pd.concat([X_train, y_train], axis=1)

# Separate minority and majority classes
not_win = X_concat[X_concat.wr==0]
win = X_concat[X_concat.wr==1]

# Upsample minority
not_win_upsampled = resample(not_win,
                          replace=True, # sample with replacement
                          n_samples=len(win), # match number in majority class
                          random_state=0) # reproducible results

# Combine majority and upsampled minority
upsampled = pd.concat([win, not_win_upsampled]).sort_values(by="Buy_Date")

# Using logistic regression again with the balanced dataset
X_train = upsampled.drop('wr', axis=1)
y_train = upsampled.wr

# Training the model
upsampled = LogisticRegression(solver='liblinear').fit(X_train, y_train)

In [6]:
# Combining all prediction data generated
df_notscale_pred = pd.concat(list(nsp.values()),ignore_index=True)
df_scale_pred = pd.concat(list(sp.values()),ignore_index=True)

# Rename column
df_scale_pred = df_scale_pred.rename(columns={0: 'Buy', 1: 'Low',2: 'Open', 3: 'Volume',4: 'High'})

# Concat all
result_scaler_pred = pd.concat([df_notscale_pred, df_scale_pred], axis=1)
result_scaler_pred = result_scaler_pred.rename(columns={'d': 'Date'})

# Setting Dates as index
df_all_pred = result_scaler_pred.sort_values(by="Date")
df_all_pred.set_index('Date', inplace=True)

# Encoding the stock names
le = LabelEncoder()
df_all_pred["Stock"] = le.fit_transform(df_all_pred["Stock_Name"])

In [52]:
# Taking today and yesterday data only
latest = df_all_pred[date2:]

# Record prediction result
post = {}

for m in range(len(latest)):
    if latest["Buy OS"].iloc[m] == True:
        
        # Assigning the dictionary for each stocks
        dct_1 = {}

        # predicting the test set
        inputs = latest.drop(['Stock_Name','Buy OS'] ,axis=1).iloc[m].values
        inputs = inputs.reshape(1,-1)
        
        # Test the model
        y_pred =upsampled.predict(inputs)
        pred1 = [round(value) for value in y_pred]
        print(pred1)
        
        # Saving the dct format
        dct_1["Signal"] = "1"
        dct_1["Predicted_Class"] =  pred1[0]
        dct_1["Date_predicted"] = dt.datetime.today()
        print("{} shows buy signal... Producing the prediction".format(latest["Stock_Name"].iloc[m]))
        # Append dict in post
        post["{}".format(latest["Stock_Name"].iloc[m])] = dct_1   
    else:
        dct_1["Signal"] = "0"
        dct_1["Predicted_Class"] = '0'
        dct_1["Date_predicted"] = dt.datetime.today()
        
        
        print("{} shows No buy signal.\n No prediction".format(latest["Stock_Name"].iloc[m]))
        # Append dict in post
        post["{}".format(latest["Stock_Name"].iloc[m])] = dct_1
        pass

SKPRES shows No buy signal.
 No prediction
DUFU shows No buy signal.
 No prediction
VITROX shows No buy signal.
 No prediction
GREATEC shows No buy signal.
 No prediction
ATAIMS shows No buy signal.
 No prediction
PENTA shows No buy signal.
 No prediction
FRONTKN shows No buy signal.
 No prediction
MPI shows No buy signal.
 No prediction
FPI shows No buy signal.
 No prediction
GENETEC shows No buy signal.
 No prediction
VS shows No buy signal.
 No prediction
D&O shows No buy signal.
 No prediction
UWC shows No buy signal.
 No prediction
UNISEM shows No buy signal.
 No prediction
DUFU shows No buy signal.
 No prediction
PENTA shows No buy signal.
 No prediction
VS shows No buy signal.
 No prediction
FPI shows No buy signal.
 No prediction
GREATEC shows No buy signal.
 No prediction
ATAIMS shows No buy signal.
 No prediction
SKPRES shows No buy signal.
 No prediction
FRONTKN shows No buy signal.
 No prediction
UNISEM shows No buy signal.
 No prediction
GENETEC shows No buy signal.
 No pr

In [53]:
post

{'SKPRES': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 8, 17, 12, 44, 701923)},
 'DUFU': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 8, 17, 12, 44, 701923)},
 'VITROX': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 8, 17, 12, 44, 701923)},
 'GREATEC': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 8, 17, 12, 44, 701923)},
 'ATAIMS': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 8, 17, 12, 44, 701923)},
 'PENTA': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 8, 17, 12, 44, 701923)},
 'FRONTKN': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 8, 17, 12, 44, 701923)},
 'MPI': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 8, 17, 12, 44, 701923)},
 'FP

In [44]:
# Obtaining variables from environment variables
user = credentials.login['username_db']
pwd = credentials.login['password_db']
ip = credentials.login['ip_db']
port = credentials.login['port_db']

In [28]:
#1. connect to cluster -> master_client = ......
# Access to cluster
master_client = MongoClient("mongodb://{}:{}@{}:{}".format(user,pwd,ip,port))
print("list of db", master_client.list_database_names())

list of db ['MA5_efficiency', 'News_Keywords', 'News_Project', 'News_Project_01', 'News_Update', 'Stochastic_strategy', 'The_Star_Logs', 'The_Star_Webscrape', 'admin', 'config', 'fbp_predictions2', 'local', 'logs', 'syamil_test', 'test', 'test_db', 'xgb_pred']


In [29]:
# Accessing mongodb database
db = master_client['Stochastic_strategy']

In [30]:
# Reviewing existed collections
print("No. of Collections before: ", len(db.list_collection_names()))
print("----------------------------------------------------------------")

No. of Collections before:  1
----------------------------------------------------------------


In [31]:
# Create new collections for each predicted values
col = db[str(dt.datetime.today().date())]

In [45]:
# Insert forecasted value in each collections
col.insert_one(post)


<pymongo.results.InsertOneResult at 0x2280ec24500>

In [46]:
#counting document after
post_count = col.count_documents({})
print('document after ',post_count)

document after  2
