In [1]:
import pandas as pd
import numpy as np
import credentials
import requests
from sklearn.preprocessing import StandardScaler
from pandas import DataFrame
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
import datetime as dt
import pymongo
from pymongo import MongoClient
# pulling data API
key =credentials.login['key_isaham']  # extract password

# Define the instruments to download.
tech = ["GREATEC",
        "UWC",       
        "D&O", 
       "GENETEC", 
       "UNISEM", 
       "FRONTKN", 
       "ATAIMS", 
       "FPI", 
       "PENTA", 
       "DUFU", 
       "VS", 
       "SKPRES", 
        "MPI", 
        "VITROX"
       ]         
# looping through all stocks

# Scale dictionary of dataframes for training set(scaling by stock)
ss={}
d = {}

# Scale dictionary of dataframes for prediction set(scaling by stock)
sp={}
nsp={}

for m,stock in enumerate (tech):
        
    print("----------------PROCESSING {}------------------".format(stock))
    symbol = stock    # Which stock to pull    
    response = requests.get("https://admin.isaham.my/api/chart?stock={}&key={}".format(symbol,key))
    data = response.json()
    
    # Tuning OB and OS level
    ob_cross=85
    os_cross=35

    df= pd.DataFrame.from_dict(data)
    df= df[['c','d','h','l','o','v']]
    df['d'] = pd.to_datetime(df['d'], dayfirst= True )
    
    # Identify MA50
    n=50
    ma50=[] 

    for i in range(len(df["c"])-(n-1)):
        ma = round(sum(df["c"][i:i+(n)])/n,4)
        ma50.append(ma)
    
    df["MA50"] = pd.Series(ma50)
    df["MA50"] = df["MA50"].shift(n-1)

    # Filter uptrend price above ma50
    # Calculating the gap of both MA
    df["MAgap"] = df["c"] -  df["MA50"] 
    
    # Set date as index
    df.set_index('d',inplace=True,drop=False)
    df.index.name = None

    # Create signal for uptrend price above ma50 
    # Result not accurate for stochastic when just filter uptrend only
    df["Uptrend"]=df['MAgap']>0
    Uptrend=df["Uptrend"]
    Uptrend_signal=Uptrend[Uptrend==True]
    
    # Signal stochastic
    # Create the "L14" column in the DataFrame
    df['L14'] = df['l'].rolling(window=14).min()

    # Create the "H14" column in the DataFrame
    df['H14'] = df['h'].rolling(window=14).max()

    # Create the "%K" column in the DataFrame
    df['%K'] = 100*((df['c'] - df['L14']) / (df['H14'] - df['L14']) )

    # Create the "%D" column in the DataFrame
    df['%D'] = df['%K'].rolling(window=3).mean()

    # Create a column in the DataFrame showing "TRUE" if buy entry signal is given and "FALSE" otherwise. 
    # A buy is initiated when the %K line crosses up through the %D line and the value of the oscillator is below 20 
    df['Buy OS'] = ((df['%K'] > df['%D']) & (df['%K'].shift(1) < df['%D'].shift(1))) & (df['%D'] < os_cross) & (Uptrend_signal)

    # Create a column in the DataFrame showing "TRUE" if sell entry signal is given and "FALSE" otherwise. 
    # A sell is initiated when the %K line crosses down through the %D line and the value of the oscillator is above 80 
    df['Sell OB'] = ((df['%K'] < df['%D']) & (df['%K'].shift(1) > df['%D'].shift(1))) & (df['%D'] > ob_cross) 
    
    # Create a column in the DataFrame for name of stock
    df['Stock_Name'] = symbol

    # Define overbought and oversold
    overbought =df['Sell OB']
    oversold =df['Buy OS']
    
    # Use index(date) for create strategy using overbought and oversold
    date_os=list(oversold[oversold == True].index)
    date_ob=list(overbought[overbought == True].index)
    
    # Scaling certain features only
    col_names_pred = ['c', 'l', 'o', 'v','h']  
    features_pred = df[col_names_pred]
    
    # Scaling for prediction data
    scaler_pred = StandardScaler().fit(features_pred.values)
    features_pred = scaler_pred.transform(features_pred.values)
    
    # Convert the array back to a dataframe
    dataset_pred = DataFrame(features_pred)
    
    # Prepare prediction data that no need to scale
    col_notscale_pred=['d','Stock_Name','Buy OS']
    features_notscale_pred=df[col_notscale_pred]
    
    # Saving in multiple variable
    sp["{}".format(stock)] = dataset_pred
    nsp["{}".format(stock)] = features_notscale_pred
    
    # Transaction recording
    buy = []
    sell = []
    sdate = []
    bdate = []
    wr = []
    pct = []
    vol = []
    ope =[]
    high = []
    low = []

    # Record close,open,vol,high,low for buy transaction
    for n,i in enumerate(date_os):
        buy.append(df['c'].loc[i])
        vol.append(df['v'].loc[i])
        ope.append(df['o'].loc[i])
        high.append(df['h'].loc[i])
        low.append(df['l'].loc[i])
        bdate.append(i)
        r=[]
        
        for m,k in enumerate(date_ob):
            
            if k>i:
                r.append(m)
                if len(r) ==1:
                    # Record close,percentage change for sell transaction
                    sell.append(df['c'].loc[k])
                    sdate.append(k)
                    pct_change=((df['c'].loc[k]/df['c'].loc[i])-1)*100
                    pct.append(((df['c'].loc[k]/df['c'].loc[i])-1)*100)
                elif len(r)>1:
                    pass
    diff =len(buy)-len(sell)
    # Checking for final transaction, make sure it is sell, as we want to close the position
    if len(buy) == len(sell):
        pass   
    elif len(buy) != len(sell):
        # Deleting excess buy transaction after we close position
        del(buy[-diff:])
        del(bdate[-diff:])
        del(ope[-diff:])
        del(high[-diff:])
        del(vol[-diff:])
        del(low[-diff:])
    dct = {
        "Buy_Date": bdate,
        "Buy": buy,
        "Sell": sell,
        "Sell_Date": sdate,
        "Open":ope,
        'Volume': vol,
        "Low": low,
        "high": high,
        "Pct_change": pct    
    }  
    # Save document for stochastic strategy
    trans = pd.DataFrame(dct)
    
    # Define variable for winning rate
    wr = []
    for i in range(len(trans)):
        if trans["Pct_change"][i] > 0:
            wr.append(1)
        else:
            wr.append(0)
    
    # Define variable for name of stock
    Name_stock = []
    for s in range(len(trans)):
        Name_stock.append(symbol)
        
    # Add new column
    trans["wr"] = pd.Series(wr)
    trans['Stock'] = pd.Series(Name_stock)
    
    
    
    # Scaling certain features only for training data
    col_names = ['Buy', 'Low', 'Open', 'Volume','high']  
    features = trans[col_names]
    
    # Prepare prediction data that no need to scale
    scaler = StandardScaler().fit(features.values)
    features = scaler.transform(features.values)
    
    # Convert the array back to a dataframe
    dataset = DataFrame(features)    
    col_notscale = ['Buy_Date','Stock','wr']
    features_notscale = trans[col_notscale]
    
    # Saving in multiple variable
    d["{}".format(stock)] = dataset
    ss["{}".format(stock)] = features_notscale
    print("----------------DONE {}------------------".format(symbol))

# Combining all training data generated
df_notscale = pd.concat(list(ss.values()),ignore_index=True)
df_scale = pd.concat(list(d.values()),ignore_index=True)
df_scale=df_scale.rename(columns={0: 'Buy', 1: 'Low',2: 'Open', 3: 'Volume',4: 'High'})
result_scaler = pd.concat([df_notscale, df_scale], axis=1)

# Setting Dates as index
df_all = result_scaler.sort_values(by="Buy_Date")
df_all.set_index('Buy_Date', inplace=True)

# Encoding the stock names
le = LabelEncoder()
df_all["Stock"] = le.fit_transform(df_all["Stock"])

# Define the splitting date (for training we use 2 years data)
# Splitting train test
day = dt.datetime.today().day
month = dt.datetime.today().month
year = dt.datetime.today().year - 5
date1 = "{}-{}-01".format(year, str(month).zfill(2), str(day).zfill(2))
date2 = str(dt.datetime.today().date() - dt.timedelta(days=1))  # until yesterday only

# Separate input features and target
X = df_all.drop(['wr'], axis=1)
y = df_all.wr









----------------PROCESSING GREATEC------------------
----------------DONE GREATEC------------------
----------------PROCESSING UWC------------------
----------------DONE UWC------------------
----------------PROCESSING D&O------------------
----------------DONE D&O------------------
----------------PROCESSING GENETEC------------------
----------------DONE GENETEC------------------
----------------PROCESSING UNISEM------------------
----------------DONE UNISEM------------------
----------------PROCESSING FRONTKN------------------
----------------DONE FRONTKN------------------
----------------PROCESSING ATAIMS------------------
----------------DONE ATAIMS------------------
----------------PROCESSING FPI------------------
----------------DONE FPI------------------
----------------PROCESSING PENTA------------------
----------------DONE PENTA------------------
----------------PROCESSING DUFU------------------
----------------DONE DUFU------------------
----------------PROCESSING VS---------

In [None]:
# Splitting train test
#X_train, y_train = X[date1: date2], y[date1: date2]12-30-2020':'04-01-2021'

In [10]:
# Splitting train test
#X_train, y_train = X[: '01-01-2021'], y[: '01-01-2021']

In [3]:
# Splitting train test
X_train, y_train = X[date1: date2], y[date1: date2]

X_trainss, y_train = X_train.to_numpy(), y_train.to_numpy()

# Training the model
upsampled = XGBClassifier(verbosity=0,random_seed=0).fit(X_trainss, y_train)

# Combining all prediction data generated
df_notscale_pred = pd.concat(list(nsp.values()),ignore_index=True)
df_scale_pred = pd.concat(list(sp.values()),ignore_index=True)

# Rename column
df_scale_pred = df_scale_pred.rename(columns={0: 'Buy', 1: 'Low',2: 'Open', 3: 'Volume',4: 'High'})

# Concat all
result_scaler_pred = pd.concat([df_notscale_pred, df_scale_pred], axis=1)
result_scaler_pred = result_scaler_pred.rename(columns={'d': 'Date'})

# Setting Dates as index
df_all_pred = result_scaler_pred.sort_values(by="Date")
df_all_pred.set_index('Date', inplace=True)

# Encoding the stock names
le = LabelEncoder()
df_all_pred["Stock"] = le.fit_transform(df_all_pred["Stock_Name"])



In [4]:
df_all

Unnamed: 0_level_0,Stock,wr,Buy,Low,Open,Volume,High
Buy_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1994-09-07,7,0,-0.752361,-0.758173,-0.755265,1.202233,-0.733711
1994-09-27,7,0,-0.733371,-0.746831,-0.746833,0.809197,-0.714979
1994-09-30,7,0,-0.722520,-0.755337,-0.729968,-0.396468,-0.723007
1994-09-30,0,0,0.649960,0.685527,0.672256,-0.329433,0.656679
1994-10-04,0,0,0.658738,0.676506,0.663335,-0.470362,0.635251
...,...,...,...,...,...,...,...
2021-03-04,6,1,1.351179,1.376893,1.428215,0.936792,1.389249
2021-03-09,0,1,0.711406,0.617868,0.636572,5.593638,0.686678
2021-03-10,7,1,3.447116,3.503234,3.725253,4.331648,3.561174
2021-03-12,0,1,0.689461,0.712591,0.712400,1.643264,0.682392


In [15]:
#latest = df_all_pred[date2:]

In [46]:
# Taking today and yesterday data only
latest = df_all_pred[date2:]

# Record prediction result
post = {}

In [47]:
latest

Unnamed: 0_level_0,Stock_Name,Buy OS,Buy,Low,Open,Volume,High,Stock
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-04-22,SKPRES,False,2.534027,2.557744,2.610592,0.267694,2.548944,9
2021-04-22,DUFU,False,4.43991,4.344354,4.236226,0.044498,4.367683,2
2021-04-22,VITROX,False,3.631051,3.616769,3.557823,-0.213976,3.656715,12
2021-04-22,GREATEC,False,1.886862,1.942204,1.908022,-0.312145,1.839043,6
2021-04-22,ATAIMS,False,1.088378,1.133344,1.082334,1.049593,1.048451,0
2021-04-22,PENTA,False,4.238131,4.278826,4.191498,-0.11111,4.175552,8
2021-04-22,FRONTKN,False,4.277435,4.351472,4.371273,-0.382555,4.276952,4
2021-04-22,MPI,True,4.114099,4.085231,3.999184,0.27374,4.078972,7
2021-04-22,FPI,False,3.925347,4.023641,4.009435,-0.014231,3.90744,3
2021-04-22,GENETEC,False,4.02867,4.153684,4.039546,-0.188061,3.989179,5


In [48]:
for m in range(len(latest)):
    # Assigning the dictionary for each stocks
    dct_1 = {}
    if latest["Buy OS"].iloc[m] == True:

        # predicting the test set
        inputs = latest.drop(['Stock_Name','Buy OS'] ,axis=1).iloc[m].values
        inputs = inputs.reshape(1,-1)
        
        # Test the model
        y_pred =upsampled.predict(inputs)
        pred1 = [round(value) for value in y_pred]
        print(pred1)
        
        # Saving the dct format
        dct_1["Signal"] = "1"
        dct_1["Predicted_Class"] =  pred1[0]
        dct_1["Date_predicted"] = dt.datetime.today()
        print("{} shows buy signal... Producing the prediction".format(latest["Stock_Name"].iloc[m]))
        # Append dict in post
        post["{}".format(latest["Stock_Name"].iloc[m])] = dct_1   
    else:
        
        dct_1["Signal"] = "0"
        dct_1["Predicted_Class"] = '0'
        dct_1["Date_predicted"] = dt.datetime.today()
        
        
        print("{} shows No buy signal.\n No prediction".format(latest["Stock_Name"].iloc[m]))
        # Append dict in post
        post["{}".format(latest["Stock_Name"].iloc[m])] = dct_1
        pass

SKPRES shows No buy signal.
 No prediction
DUFU shows No buy signal.
 No prediction
VITROX shows No buy signal.
 No prediction
GREATEC shows No buy signal.
 No prediction
ATAIMS shows No buy signal.
 No prediction
PENTA shows No buy signal.
 No prediction
FRONTKN shows No buy signal.
 No prediction
[0]
MPI shows buy signal... Producing the prediction
FPI shows No buy signal.
 No prediction
GENETEC shows No buy signal.
 No prediction
VS shows No buy signal.
 No prediction
D&O shows No buy signal.
 No prediction
UWC shows No buy signal.
 No prediction
UNISEM shows No buy signal.
 No prediction
DUFU shows No buy signal.
 No prediction
PENTA shows No buy signal.
 No prediction
VS shows No buy signal.
 No prediction
FPI shows No buy signal.
 No prediction
GREATEC shows No buy signal.
 No prediction
ATAIMS shows No buy signal.
 No prediction
SKPRES shows No buy signal.
 No prediction
FRONTKN shows No buy signal.
 No prediction
UNISEM shows No buy signal.
 No prediction
GENETEC shows No buy s

In [49]:
post

{'SKPRES': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 23, 17, 35, 6, 25283)},
 'DUFU': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 23, 17, 35, 6, 24282)},
 'VITROX': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 23, 17, 35, 6, 26284)},
 'GREATEC': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 23, 17, 35, 6, 24282)},
 'ATAIMS': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 23, 17, 35, 6, 24282)},
 'PENTA': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 23, 17, 35, 6, 24282)},
 'FRONTKN': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 23, 17, 35, 6, 25283)},
 'MPI': {'Signal': '0',
  'Predicted_Class': '0',
  'Date_predicted': datetime.datetime(2021, 4, 23, 17, 35, 6, 26284)},
 'FPI': {'Si

In [50]:
# Obtaining variables from environment variables
user = credentials.login['username_db']
pwd = credentials.login['password_db']
ip = credentials.login['ip_db']
port = credentials.login['port_db']

In [51]:
#1. connect to cluster -> master_client = ......
# Access to cluster
master_client = MongoClient("mongodb://{}:{}@{}:{}".format(user,pwd,ip,port))
print("list of db", master_client.list_database_names())

list of db ['MA5_efficiency', 'News_Keywords', 'News_Project', 'News_Project_01', 'News_Update', 'Stochastic_strategy', 'The_Star_Logs', 'The_Star_Webscrape', 'Trading_Prediction', 'admin', 'config', 'fbp_predictions2', 'local', 'logs', 'syamil_test', 'test', 'test_db', 'xgb_pred']


In [52]:
# Accessing mongodb database
db = master_client['Trading_Prediction']

In [53]:
# Reviewing existed collections
print("No. of Collections before: ", len(db.list_collection_names()))
print("----------------------------------------------------------------")

No. of Collections before:  2
----------------------------------------------------------------


In [54]:
# Create new collections for each predicted values
col = db['Stochastic_strategy']

In [55]:
col.insert_one(post)

<pymongo.results.InsertOneResult at 0x2a3ed9ab800>

In [56]:
#counting document after
post_count = col.count_documents({})
print('document after ',post_count)

document after  3
