## Imports

In [None]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import math
import pickle
%matplotlib inline

import datetime
from scipy.stats import linregress
from IPython.display import Image
from statistics import mean
import random

# Plot Imports
import matplotlib.pyplot as plt 
import matplotlib.dates as mpl_dates
import plotly.graph_objects as go
import datetime

pd.set_option("display.max_rows", None, "display.max_columns", None)

## Functions

### Closing

In [None]:
def Closing(df):
    '''Function that provides Close_Up/Close_Dw depending on how it closed in regards to the open.

    INPUTS:

     - df

    OUTPUTS:

     - ['Closing'] ==> output: col with 2 vbles: 1 if Close_Up & -1 if Close_Dw.

    '''
    df['Closing'] = df.apply(lambda x : 1 if x['Open'] <= x['Close'] else -1, axis=1)

### Market Structure

In [None]:
def Market_Structure(df,clean_sweep,swing_max,perc_95):
    
    '''Function that provides the Market Structure on the df adding the following cols outputs:

    INPUTS:

     - df

    OUTPUTS:

     - MS_H      ==> output:  value of Market structure High for each data point
     - MS_L      ==> output:  value of Market structure Low  for each data point
     - MS_Sit    ==> outputs are categorical vbles: 'MS', 'Settling','Up_Break', 'Dw_Break'
    
    perc_95 used for H1 is: 126.6 pips
    perc_95 used for H4 is: 259.1 pips
    perc_95 used for D1 is: 577.2 pips    
    '''
    
# Initiates 1st row;

    for i in range (0,10*swing_max):
        
        df.loc[df.index[i],'MS_L'] = df.loc[df.index[i],'Low']   
        df.loc[df.index[i],'MS_H'] = df.loc[df.index[i],'High']
        df.loc[df.index[i],'MS_Sit'] = 'MS'
 
    for i in range(10*swing_max,len(df)):
        
        i = i + df.index[0]
    
    # From 'MS'/'Dw_break' => 'Up_break' situation
        if (df.loc[i-1,'MS_Sit'] not in ('Up_Break')) & (df.loc[i,'Close'] > df.loc[i-1,'MS_H'] + 0.0001*clean_sweep):
            
            df.loc[i,'MS_Sit'] = 'Up_Break'
            df.loc[i,'MS_H']   = df.loc[i,'High']
            #df.loc[i,'MS_N']   = df.loc[i-1,'MS_N'] + 1

    # Id 1st Low swing before the break
            for j in range (0,1000):
                
                if (df.loc[(i-j),'Low'] >= df.loc[(i-j) -1 ,'Low']):
                    continue
                else:
                    if df.loc[(i-j),'Low'] < df.loc[i-j-swing_max:i-j-1,'Low'].min():
                        df.loc[i,'MS_L'] = df.loc[(i-j),'Low']
                        
    # Test that MS_range is under limit perc_95
                        if (10000 * (df.loc[i,'MS_H'] - df.loc[i,'MS_L'])) >= perc_95:
                            df.loc[i,'MS_L'] = df.loc[i,'MS_H'] - 0.0001 * perc_95
                        else:
                            pass
                        break
                    
                    else:
                        continue
        
    # From 'MS'/'Up_break' => 'Dw_break' situation
        elif (df.loc[i-1,'MS_Sit'] not in ('Dw_Break')) & (df.loc[i,'Close'] < df.loc[i-1,'MS_L']- 0.0001*clean_sweep):
            
            df.loc[i,'MS_Sit'] = 'Dw_Break'
            df.loc[i,'MS_L']   = df.loc[i,'Low']

    # Id 1st High swing before the break
            for j in range (0,1000):
                if (df.loc[(i-j),'High'] <= df.loc[(i-j) -1 ,'High']):
                    continue
                else:
                    if df.loc[(i-j),'High'] > df.loc[i-j-swing_max:i-j-1,'High'].max():
                        df.loc[i,'MS_H'] = df.loc[(i-j),'High']
                        
    # Test that MS_range is under limit perc_95
                        if (10000 * (df.loc[i,'MS_H'] - df.loc[i,'MS_L'])) >= perc_95:
                            df.loc[i,'MS_H'] = df.loc[i,'MS_L'] + 0.0001 * perc_95
                        else:
                            pass
                        break
                    else:
                        continue

    # From Up_Break => Up_Break / Settling_Up       
        elif (df.loc[i-1,'MS_Sit'] in ('Up_Break','Settling_Up')) & (df.loc[i,'High'] > df.loc[i-1,'High']):
            
            df.loc[i,'MS_Sit'] = 'Settling_Up'
            df.loc[i,'MS_H']   = df.loc[i,'High']
            
    # Test that MS_range is under limit perc_95
            if (10000 * (df.loc[i,'MS_H'] - df.loc[i-1,'MS_L'])) <= perc_95:
                df.loc[i,'MS_L']   = df.loc[i-1,'MS_L']   
            else:
                df.loc[i,'MS_L'] = df.loc[i,'MS_H'] - 0.0001 * perc_95

    # From Dw_Break => Dw_Break / Settling_Dw situation           
        elif (df.loc[i-1,'MS_Sit'] in ('Dw_Break','Settling_Dw')) & (df.loc[i,'Low'] < df.loc[i-1,'Low']):
            
            df.loc[i,'MS_Sit'] = 'Settling_Dw'
            df.loc[i,'MS_L']   = df.loc[i,'Low']
            
    # Test that MS_range is under limit perc_95
            if (10000 * (df.loc[i-1,'MS_H'] - df.loc[i,'MS_L'])) <= perc_95:
                df.loc[i,'MS_H']   = df.loc[i-1,'MS_H']   
            else:
                df.loc[i,'MS_H'] = df.loc[i,'MS_L'] + 0.0001 * perc_95

    # From MS => MS situation             
        else:
            
            df.loc[i,'MS_Sit'] = 'MS'
            df.loc[i,'MS_H']   = df.loc[i-1,'MS_H']
            df.loc[i,'MS_L']   = df.loc[i-1,'MS_L']

### Trend

In [None]:
def Trend(df):

    '''Function that calculates the trend based on the previous MS break:

    INPUTS:

     - df

    OUTPUTS:

     - Trend  ==> outputs are categorical vbles: 'Up', 'Dw' (depending on the value of the previous Break) 

    '''
    
    df.loc[df['MS_Sit'] == 'Up_Break','Trend'] =  1
    df.loc[df['MS_Sit'] == 'Dw_Break','Trend'] = -1
    
    for i in range(1,len(df)):
        
        i = i + df.index[0]
        
        if df.loc[i,'MS_Sit'] not in ('Dw_Break','Up_Break'):
            df.loc[i,'Trend'] = df.loc[i-1,'Trend']
            
        df['Trend'] = df['Trend'].fillna('none')        

### N_breaks

In [None]:
def N_breaks(df):

    '''Function that calculates the number of previous breaks in the same direction:

    INPUTS:

     - df

    OUTPUTS:

     - N_Breaks ==> value of the the number of previous breaks in the same direction

    '''
    
    # Correction of ('Dw_Break','Up_Break')
    
    for i in range (1,len(df)):
        
        i = i + df.index[0]
        
        if (df.loc[i,'MS_Sit'] == 'Dw_Break') & (df.loc[i-1,'MS_Sit'] == 'Settling_Dw'):
            df.loc[i,'MS_Sit'] = 'Settling_Dw'
            
        elif (df.loc[i,'MS_Sit'] == 'Up_Break') & (df.loc[i-1,'MS_Sit'] == 'Settling_Up'):
            df.loc[i,'MS_Sit'] = 'Settling_Up'
        
        else:
            pass
            
    
    df['N_Breaks'] = 0
    
    for i in range (1,len(df)):
        
        i = i + df.index[0]
        
        if df.loc[i,'MS_Sit'] not in ('Dw_Break','Up_Break'):
            df.loc[i,'N_Breaks'] = df.loc[i-1,'N_Breaks']
            
        elif (df.loc[i,'MS_Sit'] == 'Up_Break'):
            if (df.loc[i-1,'Trend'] == -1):
                df.loc[i,'N_Breaks'] = 1
            else:
                df.loc[i,'N_Breaks'] = df.loc[i-1,'N_Breaks'] + 1
        
        elif (df.loc[i,'MS_Sit'] == 'Dw_Break'):
            if (df.loc[i-1,'Trend'] == 1):
                df.loc[i,'N_Breaks'] = -1
            else:
                df.loc[i,'N_Breaks'] = df.loc[i-1,'N_Breaks'] - 1
                
    df['N_Breaks'] = df['N_Breaks'].abs()

### MS_periods

In [None]:
def MS_periods(df):
    
    '''Function that calculates the number of periods the price remains in MS:

    INPUTS:

     - df

    OUTPUTS:

     - N_periods_MS ==> value of the range within the same MS

    '''
    
    df.loc[df.index[0],'MS_Pds'] = 0
    
    for i in range (1,len(df)):
        
        i = i + df.index[0]
        
        if (df.loc[i-1,'MS_Sit'] == 'MS') & (df.loc[i,'MS_Sit'] == 'MS'):
            df.loc[i,'MS_Pds'] = df.loc[i-1,'MS_Pds'] + 1
        
        elif (df.loc[i-1,'MS_Sit'] != 'MS') & (df.loc[i,'MS_Sit'] == 'MS'):
            df.loc[i,'MS_Pds'] = 1
        
        else:
            df.loc[i,'MS_Pds'] = 0
    
    # Converts column into integers
    df['MS_Pds'] = df['MS_Pds'].apply(np.int64)

### MS_range

In [None]:
def MS_range(df):

    '''Function that calculates the number of pips between MS_High & MS_Low within MS:

    INPUTS:

     - df

    OUTPUTS:

     - MS_range ==> value of the range (MS_H - MS_L) within the same MS

    '''
    
    df['MS_range'] = 10000 * (df['MS_H'] - df['MS_L'])

### MS_N

In [None]:
def MS_N(df):
    
    df.loc[df.index[0],'MS_N'] = 0
    for i in range(1,len(df)):

        i = i + df.index[0]

        if df.loc[i,'MS_Sit'] in ('Dw_Break','Up_Break'):
            df.loc[i,'MS_N']   = df.loc[i-1,'MS_N'] + 1

        else:
            df.loc[i,'MS_N']   = df.loc[i-1,'MS_N']

### Indicators

In [None]:
def Indicators(df):

    ''' -----------------------------------
    Calculates RSI (SMA) using RSI_SMA_1

    RSI_SMA_1 = 14
    Upper_SMA_RSI_lim = 70
    Lower_SMA_RSI_lim = 30

    '''

    RSI_SMA_1 = 14
    Upper_SMA_RSI_lim = 70
    Lower_SMA_RSI_lim = 30
    
    df['Close_im1'] = df['Close'].shift()
    df['U_Move'] = df.apply(lambda x : x['Close'] - x['Close_im1'] if x['Close'] > x['Close_im1'] else np.NaN, axis=1)
    df['D_Move'] = df.apply(lambda x : x['Close_im1'] - x['Close'] if x['Close'] < x['Close_im1'] else np.NaN, axis=1)

    df[str(RSI_SMA_1) + '_Avg_U'] = df['U_Move'].rolling(min_periods=1, window=RSI_SMA_1).mean()
    df[str(RSI_SMA_1) + '_Avg_D'] = df['D_Move'].rolling(min_periods=1, window=RSI_SMA_1).mean()

    #Calculates the RSI

    df['RSI_' + str(RSI_SMA_1) + '_SMA'] = 100-(100/(1+(df[str(RSI_SMA_1) + '_Avg_U']/df[str(RSI_SMA_1) + '_Avg_D'])))
    
    ''' -----------------------------------
    Calculates RSI (EMA) using RSI_EMA_1 for the specific period

    RSI_EMA_1 = 14
    Upper_EMA_RSI_lim = 70
    Lower_EMA_RSI_lim = 30 

    '''

    RSI_EMA_1 = 14
    Upper_EMA_RSI_lim = 70
    Lower_EMA_RSI_lim = 30 

    df[str(RSI_EMA_1) + '_Avg_U_EMA'] = df[str(RSI_SMA_1) + '_Avg_U'].ewm(span=RSI_EMA_1).mean()
    df[str(RSI_EMA_1) + '_Avg_D_EMA'] = df[str(RSI_SMA_1) + '_Avg_D'].ewm(span=RSI_EMA_1).mean()

    df['RSI_' + str(RSI_EMA_1) + '_EMA'] = 100-(100/(1+(df[str(RSI_EMA_1) + '_Avg_U_EMA']/df[str(RSI_EMA_1) + '_Avg_D_EMA'])))

    ''' -----------------------------------
    Calculates MACD using EMA_1_macd, EMA_2_macd

    # MACD
    EMA_1_macd = 12
    EMA_2_macd = 26
    MACD_EMA = 9

    '''

    EMA_1_macd = 12
    EMA_2_macd = 26
    MACD_EMA = 9

    df['EMA_' + str(EMA_1_macd) + '_macd'] = df['Close'].ewm(span=EMA_1_macd).mean()
    df['EMA_' + str(EMA_2_macd) + '_macd'] = df['Close'].ewm(span=EMA_2_macd).mean()
    
    # Calculates [MACD] = 10000 x ([EMA_1] - [EMA_2]) using ''''''MACD_EMA''''''
    
    df['MACD_' + str(EMA_1_macd) + '_' + str(EMA_2_macd)] = 10000 * (df['EMA_' + str(EMA_1_macd) + '_macd'] - df['EMA_' + str(EMA_2_macd) + '_macd'])

    # Calculates [EMA_MACD] = EMA_3(MACD) using ''''''MACD_EMA''''''

    df['EMA_MACD_'  + str(EMA_1_macd) + '_' + str(EMA_2_macd) + '_' + str(MACD_EMA)] = df['MACD_' + str(EMA_1_macd) + '_' + str(EMA_2_macd)].ewm(span=MACD_EMA).mean()

    # Calculates MACD Histogram [MACD] - [EMA_MACD]

    df['Hist_MACD_' + str(EMA_1_macd) + '_' + str(EMA_2_macd) + '_' + str(MACD_EMA)] = df['MACD_' + str(EMA_1_macd) + '_' + str(EMA_2_macd)] - df['EMA_MACD_'  + str(EMA_1_macd) + '_' + str(EMA_2_macd)  + '_' + str(MACD_EMA)]

    # Calculates the signal(MACD)

    df['MACD_signal'] = df['Hist_MACD_' + str(EMA_1_macd) + '_' + str(EMA_2_macd) + '_' + str(MACD_EMA)] * df['Hist_MACD_' + str(EMA_1_macd) + '_' + str(EMA_2_macd) + '_' + str(MACD_EMA)].shift()

    ''' -----------------------------------
    Calculates the Upper & lower BBands using Boll_SMA

    # Bollinger Bands
    Boll_SMA = 20
    Boll_Var = 2

    '''

    Boll_SMA = 20
    Boll_Var = 2

    # Calculates the Upper & lower BBands using ''''''Boll_SMA''''''

    df['Boll_SMA_' + str(Boll_SMA)] = df['Close'].rolling(min_periods=Boll_SMA,window=Boll_SMA).mean()
    df['Boll_SMA_' + str(Boll_SMA) + '_Var_-' + str(Boll_Var)] = df['Boll_SMA_' + str(Boll_SMA)] - Boll_Var*df['Boll_SMA_' + str(Boll_SMA)].rolling(min_periods=Boll_SMA,window=Boll_SMA).std()
    df['Boll_SMA_' + str(Boll_SMA) + '_Var_+' + str(Boll_Var)] = df['Boll_SMA_' + str(Boll_SMA)] + Boll_Var*df['Boll_SMA_' + str(Boll_SMA)].rolling(min_periods=Boll_SMA,window=Boll_SMA).std()

    # Calculates the DELTA(P,Upper|Lower BB) using ''''''Boll_Var'''''' as the No. of Variances
    
    df['Dist_BollB_SMA_' + str(Boll_SMA) + '_Var_+' + str(Boll_Var)] = 10000*( df['Close'] - df['Boll_SMA_' + str(Boll_SMA) + '_Var_+' + str(Boll_Var)])
    df['Dist_BollB_SMA_' + str(Boll_SMA) + '_Var_-' + str(Boll_Var)] = 10000*(-df['Close'] + df['Boll_SMA_' + str(Boll_SMA) + '_Var_-' + str(Boll_Var)])
    
    # Calculates the WIDENESS of the Bands
    
    df['BollB_Wideness'] = 10000 * 2*Boll_Var*df['Boll_SMA_' + str(Boll_SMA)].rolling(min_periods=Boll_SMA,window=Boll_SMA).std()
    
    '''
    # Drops unnecessary columns:

    df.drop(['Close_im1','U_Move','D_Move', 
             str(RSI_EMA_1) + '_Avg_U_EMA',
             str(RSI_EMA_1) + '_Avg_D_EMA',
             str(RSI_SMA_1) + '_Avg_U',
             str(RSI_SMA_1) + '_Avg_D',
             'Boll_SMA_' + str(Boll_SMA),
             'Boll_SMA_' + str(Boll_SMA) + '_Var_-' + str(Boll_Var),
             'Boll_SMA_' + str(Boll_SMA) + '_Var_+' + str(Boll_Var),axis=1,inplace=True)

    '''


### Read_prep_df

In [None]:
def Read_prep_df(File, n, sd, ed, clean_sweep, swing_max, perc_95):
    
    df = pd.read_excel(File)
    df = df.dropna()
    
    if sd != None:
        df = df.loc[df[df['Date'] == sd].index[0]:df[df['Date'] == ed].index[0]]
        df.reset_index(drop=True, inplace=True)
    else:
        pass   
    
    if n == 0:
        
        print(len(df))
        
        Closing(df) 
        Market_Structure(df, clean_sweep, swing_max, perc_95)
        Trend(df)
        N_breaks(df)
        MS_periods(df)
        MS_range(df)
        #MS_retracement(df)
        MS_N(df)
        Indicators(df)

    else:
        
        print(len(df))
        
        for i in range(0,1 + int(len(df)/n)):
        
            if i == 0:
            
                dfn = df.loc[(i*n) : ((i+1)*n) + 499,:].copy()
                
                Closing(dfn) 
                Market_Structure(dfn, clean_sweep, swing_max, perc_95)
                Trend(dfn)
                N_breaks(dfn)
                MS_periods(dfn)
                MS_range(dfn)
                #MS_retracement(dfn)
            
                df_out = dfn.copy()
                print(i)
                
            elif (i > 0) & (i < (1 + int(len(df)/n))):

                dfn = df.loc[i*n:((i+1)*n)+499,:].copy()

                Closing(dfn) 
                Market_Structure(dfn, clean_sweep, swing_max, perc_95)
                Trend(dfn)
                N_breaks(dfn)
                MS_periods(dfn)
                MS_range(dfn)
                #MS_retracement(dfn)

                dfn = dfn.loc[(i*n) + 500 : ((i+1)*n) + 499,:].copy()

                df_out = pd.concat([df_out,dfn])
                print(i)                
                
            else:

                dfn = df.loc[i*n:,:].copy()

                Closing(dfn) 
                Market_Structure(dfn, clean_sweep, swing_max, perc_95)
                Trend(dfn)
                N_breaks(dfn)
                MS_periods(dfn)
                MS_range(dfn)
                #MS_retracement(dfn)

                dfn = dfn.loc[(i*n) + 500 :,:].copy()

                df_out = pd.concat([df_out,dfn])
                print(i)
                
        df = df_out.copy()
        MS_N(df)
        Indicators(df)
        df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%y %H:%M:%S').dt.strftime('%Y-%m-%d %H:%M:%S')
    
    return df

### Rename

In [None]:
def Rename_df(df,suffix):
    
    '''Function that renames all columns adding the suffix at the end

    INPUTS:

     - df     : Dataframe
     - suffix : '1D', '4H' or '15M' to rename the columns

    OUTPUTS:

     - New df with renamed columns

    '''

    keys = df.columns
    values = keys + '_' + suffix
    dictionary = dict(zip(keys, values))
    
    df = df.rename(columns=dictionary)
    
    return df

### Merges D1 - H4 - H1

In [None]:
def Merge_shift_df_H4_D1(df1,df2):
    
    '''Function that merges de 4H & 1D dataframes (df1 & df2)

    INPUTS:

     - df1    : df4H
     - df2    : df1D

    OUTPUTS:

     - New merged df

    '''

    df1.iloc[:, 0] = pd.to_datetime(df1.iloc[:, 0], dayfirst=True)
    df2.iloc[:, 0] = pd.to_datetime(df2.iloc[:, 0], dayfirst=True)
    
    df1['Date_del'] = df1.iloc[:, 0] + dt.timedelta(hours=4) 
    df2['Date_del'] = df2.iloc[:, 0] + dt.timedelta(days=1)

    df = pd.merge(df1, df2, how='left', on='Date_del')
    
    df1.drop(['Date_del'],axis=1,inplace=True)
    df2.drop(['Date_del'],axis=1,inplace=True)
    df. drop(['Date_del'],axis=1,inplace=True)
    
    df = df.ffill(axis=0)
    
    return df

In [None]:
def Merge_shift_df_H1_H4(df1,df2):
    
    '''Function that merges de 15M & 4H dataframes (df1 & df2)

    INPUTS:

     - df1    : df1H
     - df2    : df4H or (dfH1_H4)

    OUTPUTS:

     - New merged df

    '''
    
    df1.iloc[:, 0] = pd.to_datetime(df1.iloc[:, 0], dayfirst=True)
    df2.iloc[:, 0] = pd.to_datetime(df2.iloc[:, 0], dayfirst=True)
    
    df1['Date_del'] = df1.iloc[:, 0] + dt.timedelta(hours=1) 
    df2['Date_del'] = df2.iloc[:, 0] + dt.timedelta(hours=4)
   
    df = pd.merge(df1, df2, how='left', on='Date_del')
    
    df['Date_H4'] = df['Date_H4'].ffill(axis=0)
    
    df1.drop(['Date_del'],axis=1,inplace=True)
    df2.drop(['Date_del'],axis=1,inplace=True)
    df. drop(['Date_del'],axis=1,inplace=True)
    
    df = df.ffill(axis=0)
    
    return df

In [None]:
def Merge_shift_df_M15_H1(df1,df2):
    
    '''Function that merges de 15M & 4H dataframes (df1 & df2)

    INPUTS:

     - df1    : dfM15
     - df2    : dfH1 or (dfM15_H1)

    OUTPUTS:

     - New merged df

    '''
    
    df1.iloc[:, 0] = pd.to_datetime(df1.iloc[:, 0], dayfirst=True)
    df2.iloc[:, 0] = pd.to_datetime(df2.iloc[:, 0], dayfirst=True)
    
    df1['Date_del'] = df1.iloc[:, 0] + dt.timedelta(minutes=15) 
    df2['Date_del'] = df2.iloc[:, 0] + dt.timedelta(hours=1)
   
    df = pd.merge(df1, df2, how='left', on='Date_del')
    
    df['Date_H1'] = df['Date_H1'].ffill(axis=0)
    
    df1.drop(['Date_del'],axis=1,inplace=True)
    df2.drop(['Date_del'],axis=1,inplace=True)
    df. drop(['Date_del'],axis=1,inplace=True)
    
    df = df.ffill(axis=0)
    
    return df

### MS_retracement

In [None]:
def MS_retracement(df):

    '''Function that calculates that the price has retraced (0,100)% between MS_High & MS_Low depending on its trend:

    INPUTS:

     - df

    OUTPUTS:

     - MS_retracement ==> value of the range (MS_H - MS_L) within the same MS
    '''
    
    df['MS_retracement_D1']  = df.apply(lambda x: 100 * (x['Close_M15'] - x['MS_L_D1'])  /(x['MS_H_D1']  - x['MS_L_D1'])  if x['Trend_D1']  == -1 else (100 * (x['MS_H_D1']  - x['Close_M15'])/(x['MS_H_D1']  - x['MS_L_D1'])),  axis=1).round(1)
    df['MS_retracement_H4']  = df.apply(lambda x: 100 * (x['Close_M15'] - x['MS_L_H4'])  /(x['MS_H_H4']  - x['MS_L_H4'])  if x['Trend_H4']  == -1 else (100 * (x['MS_H_H4']  - x['Close_M15'])/(x['MS_H_H4']  - x['MS_L_H4'])),  axis=1).round(1)
    df['MS_retracement_H1']  = df.apply(lambda x: 100 * (x['Close_M15'] - x['MS_L_H1'])  /(x['MS_H_H1']  - x['MS_L_H1'])  if x['Trend_H1']  == -1 else (100 * (x['MS_H_H1']  - x['Close_M15'])/(x['MS_H_H1']  - x['MS_L_H1'])),  axis=1).round(1)
    df['MS_retracement_M15'] = df.apply(lambda x: 100 * (x['Close_M15'] - x['MS_L_M15']) /(x['MS_H_M15'] - x['MS_L_M15']) if x['Trend_M15'] == -1 else (100 * (x['MS_H_M15'] - x['Close_M15'])/(x['MS_H_M15'] - x['MS_L_M15'])), axis=1).round(1)  
    
    #df['MS_retracement'] = df.apply(lambda x: 100 * (x['Close'] - x['MS_L']) /(x['MS_H'] - x['MS_L']) if x['Trend'] == -1 else (100 * (x['MS_H'] - x['Close'])/(x['MS_H'] - x['MS_L'])), axis=1)
    #df['MS_retracement'] = df['MS_retracement'].round(1)

### Clean_data

In [None]:
def clean_data(df):
    
    from scipy import stats
    
# Drop non required columns
    RSI_SMA_1, RSI_EMA_1 = 14, 14

    non_req_cols = ['U_Move_M15','U_Move_H1','U_Move_H4','U_Move_D1','D_Move_M15','D_Move_H1','D_Move_H4','D_Move_D1',
                    'Close_im1_M15','Close_im1_H1','Close_im1_H4','Close_im1_D1',
                    str(RSI_SMA_1) + '_Avg_U_M15',str(RSI_SMA_1) + '_Avg_U_H1',str(RSI_SMA_1) + '_Avg_U_H4',str(RSI_SMA_1) + '_Avg_U_D1',
                    str(RSI_SMA_1) + '_Avg_D_M15',str(RSI_SMA_1) + '_Avg_D_H1',str(RSI_SMA_1) + '_Avg_D_H4',str(RSI_SMA_1) + '_Avg_D_D1',
                    str(RSI_EMA_1) + '_Avg_U_EMA_M15',str(RSI_EMA_1) + '_Avg_U_EMA_H1',str(RSI_EMA_1) + '_Avg_U_EMA_H4',str(RSI_EMA_1) + '_Avg_U_EMA_D1',
                    str(RSI_EMA_1) + '_Avg_D_EMA_M15',str(RSI_EMA_1) + '_Avg_D_EMA_H1',str(RSI_EMA_1) + '_Avg_D_EMA_H4',str(RSI_EMA_1) + '_Avg_D_EMA_D1',
                    #'Open_M15','High_M15','Low_M15','Close_M15','MS_L_M15','MS_H_M15','MS_Sit_M15',
                    #'Open_H1' ,'High_H1' ,'Low_H1' ,'Close_H1', 'MS_L_H1' ,'MS_H_H1' ,'MS_Sit_H1',
                    #'Open_H4' ,'High_H4' ,'Low_H4' ,'Close_H4', 'MS_L_H4' ,'MS_H_H4' ,'MS_Sit_H4',
                    #'Open_D1' ,'High_D1' ,'Low_D1' ,'Close_D1', 'MS_L_D1' ,'MS_H_D1' ,'MS_Sit_D1',
                    'B_Win_Idx_M15_H4','B_Lose_Idx_M15_H4','S_Win_Idx_M15_H4','S_Lose_Idx_M15_H4',
                    'B_Win_Idx_M15_H1','B_Lose_Idx_M15_H1','S_Win_Idx_M15_H1','S_Lose_Idx_M15_H1']
    
    print('No. columns BEFORE dropping:',df.shape[1])
    df.drop(non_req_cols,inplace = True, axis = 1)
    print('No. columns AFTER dropping:',df.shape[1])
     
# Drop None´s
    print('Length BEFORE removing None´s:',len(df))
    last_none_idx = df[df.isin(['none']).any(axis=1)].index[-1]
    print('Last row containing a None:',last_none_idx)
    df.drop(df.index[:last_none_idx],inplace=True)
    print('Length AFTER removing None´s:',len(df))
    
# Drop Nan´s
    print('Length BEFORE removing NaNs:',len(df))
    print('Number rows containing NaNs:',df.isnull().any(axis=1).sum())
    df.dropna(inplace=True)
    print('Length AFTER removing NaNs:',len(df))
    
# Remove Ouliars z = 3 (3 standard deviations)

    gen_cols_outliars = ['RSI_14_SMA_M15','RSI_14_SMA_H1','RSI_14_SMA_H4','RSI_14_SMA_D1',
                         'RSI_14_EMA_M15','RSI_14_EMA_H1','RSI_14_EMA_H4','RSI_14_EMA_D1',
                         'EMA_12_macd_M15','EMA_12_macd_H1','EMA_12_macd_H4','EMA_12_macd_D1',
                         'EMA_26_macd_M15','EMA_26_macd_H1','EMA_26_macd_H4','EMA_26_macd_D1',
                         'MACD_12_26_M15','MACD_12_26_H1', 'MACD_12_26_H4', 'MACD_12_26_D1',
                         'EMA_MACD_12_26_9_M15','EMA_MACD_12_26_9_H1', 'EMA_MACD_12_26_9_H4', 'EMA_MACD_12_26_9_D1',
                         'Hist_MACD_12_26_9_M15','Hist_MACD_12_26_9_H1', 'Hist_MACD_12_26_9_H4', 'Hist_MACD_12_26_9_D1',
                         'MACD_signal_M15','MACD_signal_H1', 'MACD_signal_H4', 'MACD_signal_D1',
                         'Boll_SMA_20_M15','Boll_SMA_20_H1', 'Boll_SMA_20_H4', 'Boll_SMA_20_D1',
                         'Boll_SMA_20_Var_-2_M15','Boll_SMA_20_Var_-2_H1', 'Boll_SMA_20_Var_-2_H4', 'Boll_SMA_20_Var_-2_D1',
                         'Boll_SMA_20_Var_+2_M15','Boll_SMA_20_Var_+2_H1', 'Boll_SMA_20_Var_+2_H4', 'Boll_SMA_20_Var_+2_D1',
                         'Dist_BollB_SMA_20_Var_+2_M15','Dist_BollB_SMA_20_Var_+2_H1', 'Dist_BollB_SMA_20_Var_+2_H4', 'Dist_BollB_SMA_20_Var_+2_D1',
                         'Dist_BollB_SMA_20_Var_-2_M15','Dist_BollB_SMA_20_Var_-2_H1', 'Dist_BollB_SMA_20_Var_-2_H4', 'Dist_BollB_SMA_20_Var_-2_D1',
                         'BollB_Wideness_M15','BollB_Wideness_H1', 'BollB_Wideness_H4', 'BollB_Wideness_D1',]

    print('Length BEFORE removing Ouliars:',len(df))
    df = df[(np.abs(stats.zscore(df[gen_cols_outliars])) < 3).all(axis=1)]
    print('Length AFTER removing Ouliars:',len(df))

    df = df.reset_index()

    return df

### Plots

#### Plots df_X

In [None]:
def plot_MS(df, date, rng):
    
    p = df[df['Date'] == date].index[0]
    i = p
    j = p + rng
    
    dfp = df.loc[i:j,['Date', 'Open', 'High', 'Low', 'Close']]
    
    fig = go.Figure(data=[go.Candlestick(x=dfp['Date'],
                    open=dfp['Open'],
                    high=dfp['High'],
                    low=dfp['Low'],
                    close=dfp['Close'])])

    fig.update_layout(width=1000, height=1000,margin=dict(l=0, r=20, b=100, t=20, pad=4))
    
    for k in range(i,j):
        
        fig.add_shape(
                # Line Horizontal - ['MS_H']
                    type="line",
                    x0 = df.loc[k,'Date'],
                    y0 = df.loc[k,'MS_H'],
                    x1 = df.loc[k+1,'Date'],
                    y1 = df.loc[k,'MS_H'],
                    line=dict(
                        color="Green",
                        width=4,
                        dash="dashdot",
                    ))

        fig.add_shape(
                # Line Horizontal - ['MS_L']
                    type="line",
                    x0 = df.loc[k,'Date'],
                    y0 = df.loc[k,'MS_L'],
                    x1 = df.loc[k+1,'Date'],
                    y1 = df.loc[k,'MS_L'],
                    line=dict(
                        color="Blue",
                        width=4,
                        dash="dashdot",
                    ))
    
    print(df.loc[p,'Date'])
    fig.show()

#### Plots df (merged)

In [None]:
def plot_MS_all(df, date, rng):
    
    p = df[df['Date_H1'] == date].index[0]
    i = p - rng
    j = p + rng
    
    dfp = df.loc[i:j,['Date_H1', 'Open_H1', 'High_H1', 'Low_H1', 'Close_H1']]
    
    fig = go.Figure(data=[go.Candlestick(x=dfp['Date_H1'],
                    open=dfp['Open_H1'],
                    high=dfp['High_H1'],
                    low=dfp['Low_H1'],
                    close=dfp['Close_H1'])])

    fig.update_layout(width=1000, height=2000,margin=dict(l=0, r=20, b=100, t=20, pad=4))    
    
    for k in range(i,j):
        
        fig.add_shape(
                # Line Horizontal - ['MS_H_H1']
                    type="line",
                    x0 = df.loc[k,'Date_H1'],
                    y0 = df.loc[k,'MS_H_H1'],
                    x1 = df.loc[k+1,'Date_H1'],
                    y1 = df.loc[k,'MS_H_H1'],
                    line=dict(
                        color="Blue",
                        width=4,
                        dash="dashdot",
                    ))

        fig.add_shape(
                # Line Horizontal - ['MS_L_H1']
                    type="line",
                    x0 = df.loc[k,'Date_H1'],
                    y0 = df.loc[k,'MS_L_H1'],
                    x1 = df.loc[k+1,'Date_H1'],
                    y1 = df.loc[k,'MS_L_H1'],
                    line=dict(
                        color="Blue",
                        width=4,
                        dash="dashdot",
                    ))
        
        fig.add_shape(
                # Line Horizontal - ['MS_H_H4']
                    type="line",
                    x0 = df.loc[k,'Date_H1'],
                    y0 = df.loc[k,'MS_H_H4'],
                    x1 = df.loc[k+1,'Date_H1'],
                    y1 = df.loc[k,'MS_H_H4'],
                    line=dict(
                        color="Red",
                        width=4,
                        dash="dashdot",
                    ))

        fig.add_shape(
                # Line Horizontal - ['MS_L_H4']
                    type="line",
                    x0 = df.loc[k,'Date_H1'],
                    y0 = df.loc[k,'MS_L_H4'],
                    x1 = df.loc[k+1,'Date_H1'],
                    y1 = df.loc[k,'MS_L_H4'],
                    line=dict(
                        color="Red",
                        width=4,
                        dash="dashdot",
                    ))
        
        fig.add_shape(
                # Line Horizontal - ['MS_H_D1']
                    type="line",
                    x0 = df.loc[k,'Date_H1'],
                    y0 = df.loc[k,'MS_H_D1'],
                    x1 = df.loc[k+1,'Date_H1'],
                    y1 = df.loc[k,'MS_H_D1'],
                    line=dict(
                        color="Yellow",
                        width=4,
                        dash="dashdot",
                    ))

        fig.add_shape(
                # Line Horizontal - ['MS_L_D1']
                    type="line",
                    x0 = df.loc[k,'Date_H1'],
                    y0 = df.loc[k,'MS_L_D1'],
                    x1 = df.loc[k+1,'Date_H1'],
                    y1 = df.loc[k,'MS_L_D1'],
                    line=dict(
                        color="Yellow",
                        width=4,
                        dash="dashdot",
                    ))
    
    print(df.loc[p,'Date_H1'])
    fig.show()

#### Price_M15_H1

In [None]:
def Price_M15_H1(df,k,ratio,pip_min,pip_max,pip_over):
    
    '''Function that prices all M15 candles based on [Close_M15].shift() value and calcs. its stop loss &
                limits based on MS_H1.
    INPUTS:

     - df    : Dataframe
     - ratio : limit:stop_loss ratio (normally 3)

    OUTPUTS:

     - Price_M15          ==> output: value of [Close_M15].shift()
     - B_Stop_Loss_M15_H1 ==> output: # pips between ([Price_M15] - [MS_L_H1])
     - B_Limit_M15_H1     ==> output: # pips between (ratio x [B_Stop_Loss_M15_H1])
     - S_Stop_Loss_M15_H1 ==> output: # pips between ([MS_H_H1] - [Price_M15])
     - S_Limit_M15_H1     ==> output: # pips between (ratio x [S_Stop_Loss_M15_H1])
     - Labelb_M15         ==> output: 1 or 0 whether the action of buying  was succesful or not
     - Labels_M15         ==> output: 1 or 0 whether the action of selling was succesful or not    
    '''
    
# Price
    df['Price_M15']          = df['Close_M15']
    df['B_Stop_Loss_M15_H1'] = (10000 * (df['Price_M15'] - df['MS_L_H1'])) - pip_over
    df['B_Stop_Loss_M15_H1'] = df.apply(lambda x: x['B_Stop_Loss_M15_H1'] if x['B_Stop_Loss_M15_H1'] < pip_max else pip_max ,axis = 1)
    df['B_Stop_Loss_M15_H1'] = df.apply(lambda x: x['B_Stop_Loss_M15_H1'] if x['B_Stop_Loss_M15_H1'] > pip_min else pip_min ,axis = 1)
    df['B_Limit_M15_H1']     = ratio * df['B_Stop_Loss_M15_H1']
    df['S_Stop_Loss_M15_H1'] = 10000 * (df['MS_H_H1'] - df['Price_M15']) - pip_over
    df['S_Stop_Loss_M15_H1'] = df.apply(lambda x: x['S_Stop_Loss_M15_H1'] if x['S_Stop_Loss_M15_H1'] < pip_max else pip_max ,axis = 1)
    df['S_Stop_Loss_M15_H1'] = df.apply(lambda x: x['S_Stop_Loss_M15_H1'] if x['S_Stop_Loss_M15_H1'] > pip_min else pip_min ,axis = 1)
    df['S_Limit_M15_H1']     = ratio * df['S_Stop_Loss_M15_H1']

    m = 0
    for i, row in df.iterrows():
        
        if i < len(df)-k:
            m = k
        else:
            m = len(df) - i - 3
        print('Price_M15_H1 - ',int(100*((i/20000)*20000/len(df))),'%') if ((i > 0) & (i%20000 == 0)) else ''

# BUY - Losing Index
        
        if len(np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['B_Stop_Loss_M15_H1']))))[0]) == 0:
            df.loc[i, 'B_Lose_Idx_M15_H1'] = 1000000
        else:
            df.loc[i, 'B_Lose_Idx_M15_H1'] = np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['B_Stop_Loss_M15_H1']))))[0][0]       
                       
# BUY - Winning Index
        
        if len(np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['B_Limit_M15_H1']))))[0]) == 0:
            df.loc[i, 'B_Win_Idx_M15_H1'] = 1000000

        else:
            df.loc[i, 'B_Win_Idx_M15_H1'] = np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['B_Limit_M15_H1']))))[0][0]

# SELL - Losing Index
        
        if len(np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['S_Stop_Loss_M15_H1']))))[0]) == 0:
            df.loc[i, 'S_Lose_Idx_M15_H1'] = 1000000

        else:
            df.loc[i, 'S_Lose_Idx_M15_H1'] = np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['S_Stop_Loss_M15_H1']))))[0][0]

# SELL - Winning Index
        
        if len(np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['S_Limit_M15_H1']))))[0]) == 0:
            df.loc[i, 'S_Win_Idx_M15_H1'] = 1000000

        else:
            df.loc[i, 'S_Win_Idx_M15_H1'] = np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['S_Limit_M15_H1']))))[0][0]

# Label
# 1 if B_Win Index < B_Lose Index
    df['Labelb_M15_H1'] = df.apply(lambda x: 1 if x['B_Win_Idx_M15_H1'] < x['B_Lose_Idx_M15_H1'] else 0, axis=1)

# -1 if S_Win Index < S_Lose Index  
    df['Labels_M15_H1'] = df.apply(lambda x: 1 if x['S_Win_Idx_M15_H1'] < x['S_Lose_Idx_M15_H1'] else 0, axis=1) 
    
# Drops all index    
    #df.drop(['B_Lose_Idx_M15_H1','B_Win_Idx_M15_H1','S_Lose_Idx_M15_H1','S_Win_Idx_M15_H1'],axis = 1, inplace =True)
    
    return df

#### Price_M15_H4

In [None]:
def Price_M15_H4(df,k,ratio,pip_min,pip_max,pip_over):
    
    '''Function that prices all M15 candles based on [Close_M15].shift() value and calcs. its stop loss &
                limits based on MS_H1.
    INPUTS:

     - df    : Dataframe
     - ratio : limit:stop_loss ratio (normally 3)

    OUTPUTS:

     - Price_m15          ==> output: value of [Close_M15].shift()
     - B_Stop_Loss_M15_H4 ==> output: # pips between ([Price_M15] - [MS_L_H4])
     - B_Limit_M15_H4     ==> output: # pips between (ratio x [B_Stop_Loss_M15_H4])
     - S_Stop_Loss_M15_H4 ==> output: # pips between ([MS_H_H4] - [Price_M15])
     - S_Limit_M15_H4     ==> output: # pips between (ratio x [S_Stop_Loss_M15_H4])
     - Labelb_M15         ==> output: 1 or 0 whether the action of buying  was succesful or not
     - Labels_M15         ==> output: 1 or 0 whether the action of selling was succesful or not    
    '''
    
# Price
    df['Price_M15']          = df['Close_M15']
    df['B_Stop_Loss_M15_H4'] = (10000 * (df['Price_M15'] - df['MS_L_H4'])) - pip_over
    df['B_Stop_Loss_M15_H4'] = df.apply(lambda x: x['B_Stop_Loss_M15_H4'] if x['B_Stop_Loss_M15_H4'] < pip_max else pip_max ,axis = 1)
    df['B_Stop_Loss_M15_H4'] = df.apply(lambda x: x['B_Stop_Loss_M15_H4'] if x['B_Stop_Loss_M15_H4'] > pip_min else pip_min ,axis = 1)
    df['B_Limit_M15_H4']     = ratio * df['B_Stop_Loss_M15_H4']
    df['S_Stop_Loss_M15_H4'] = 10000 * (df['MS_H_H4'] - df['Price_M15']) - pip_over
    df['S_Stop_Loss_M15_H4'] = df.apply(lambda x: x['S_Stop_Loss_M15_H4'] if x['S_Stop_Loss_M15_H4'] < pip_max else pip_max ,axis = 1)
    df['S_Stop_Loss_M15_H4'] = df.apply(lambda x: x['S_Stop_Loss_M15_H4'] if x['S_Stop_Loss_M15_H4'] > pip_min else pip_min ,axis = 1)
    df['S_Limit_M15_H4']     = ratio * df['S_Stop_Loss_M15_H4']

    m = 0
    for i, row in df.iterrows():
        
        if i < len(df)-k:
            m = k
        else:
            m = len(df) - i - 3
        print('Price_M15_H4 - ',int(100*((i/10000)*10000/len(df))),'%') if ((i > 0) & (i%10000 == 0)) else ''

# BUY - Losing Index
        
        if len(np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['B_Stop_Loss_M15_H4']))))[0]) == 0:
            df.loc[i, 'B_Lose_Idx_M15_H4'] = 1000000
        else:
            df.loc[i, 'B_Lose_Idx_M15_H4'] = np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['B_Stop_Loss_M15_H4']))))[0][0]       
                       
# BUY - Winning Index
        
        if len(np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['B_Limit_M15_H4']))))[0]) == 0:
            df.loc[i, 'B_Win_Idx_M15_H4'] = 1000000

        else:
            df.loc[i, 'B_Win_Idx_M15_H4'] = np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['B_Limit_M15_H4']))))[0][0]

# SELL - Losing Index
        
        if len(np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['S_Stop_Loss_M15_H4']))))[0]) == 0:
            df.loc[i, 'S_Lose_Idx_M15_H4'] = 1000000

        else:
            df.loc[i, 'S_Lose_Idx_M15_H4'] = np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['S_Stop_Loss_M15_H4']))))[0][0]

# SELL - Winning Index
        
        if len(np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['S_Limit_M15_H4']))))[0]) == 0:
            df.loc[i, 'S_Win_Idx_M15_H4'] = 1000000

        else:
            df.loc[i, 'S_Win_Idx_M15_H4'] = np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['S_Limit_M15_H4']))))[0][0]

# Label
# 1 if B_Win Index < B_Lose Index
    df['Labelb_M15_H4'] = df.apply(lambda x: 1 if x['B_Win_Idx_M15_H4'] < x['B_Lose_Idx_M15_H4'] else 0, axis=1)

# -1 if S_Win Index < S_Lose Index  
    df['Labels_M15_H4'] = df.apply(lambda x: 1 if x['S_Win_Idx_M15_H4'] < x['S_Lose_Idx_M15_H4'] else 0, axis=1) 
    
# Drops all index    
    #df.drop(['B_Lose_Idx_M15_H4','B_Win_Idx_M15_H4','S_Lose_Idx_M15_H4','S_Win_Idx_M15_H4'],axis = 1, inplace =True)
    
    return df

#### Price *** Not Used

In [None]:
def Price_M15_H4_H1(df,k4,k1,ratio,pip_min,pip_max,pip_over):
    
    '''Function that prices all M15 candles based on [Close_M15].shift() value and calcs. its stop loss &
                limits based on MS_H1.
    INPUTS:

     - df    : Dataframe
     - ratio : limit:stop_loss ratio (normally 3)

    OUTPUTS:

     - Price_m15          ==> output: value of [Close_M15].shift()
     - B_Stop_Loss_M15_H4 ==> output: # pips between ([Price_M15] - [MS_L_H4])
     - B_Limit_M15_H4     ==> output: # pips between (ratio x [B_Stop_Loss_M15_H4])
     - S_Stop_Loss_M15_H4 ==> output: # pips between ([MS_H_H4] - [Price_M15])
     - S_Limit_M15_H4     ==> output: # pips between (ratio x [S_Stop_Loss_M15_H4])
     - Labelb_M15         ==> output: 1 or 0 whether the action of buying  was succesful or not
     - Labels_M15         ==> output: 1 or 0 whether the action of selling was succesful or not    
    '''

# Set Price Initiate

    df['Price_M15']          = df['Close_M15']    

# Price_M15_H4 Initiate

    df['B_Stop_Loss_M15_H4'] = (10000 * (df['Price_M15'] - df['MS_L_H4'])) - pip_over
    df['B_Stop_Loss_M15_H4'] = df.apply(lambda x: x['B_Stop_Loss_M15_H4'] if x['B_Stop_Loss_M15_H4'] < pip_max else pip_max ,axis = 1)
    df['B_Stop_Loss_M15_H4'] = df.apply(lambda x: x['B_Stop_Loss_M15_H4'] if x['B_Stop_Loss_M15_H4'] > pip_min else pip_min ,axis = 1)
    df['B_Limit_M15_H4']     = ratio * df['B_Stop_Loss_M15_H4']
    df['S_Stop_Loss_M15_H4'] = 10000 * (df['MS_H_H4'] - df['Price_M15']) - pip_over
    df['S_Stop_Loss_M15_H4'] = df.apply(lambda x: x['S_Stop_Loss_M15_H4'] if x['S_Stop_Loss_M15_H4'] < pip_max else pip_max ,axis = 1)
    df['S_Stop_Loss_M15_H4'] = df.apply(lambda x: x['S_Stop_Loss_M15_H4'] if x['S_Stop_Loss_M15_H4'] > pip_min else pip_min ,axis = 1)
    df['S_Limit_M15_H4']     = ratio * df['S_Stop_Loss_M15_H4']
    
# Price_M15_H1 Initiate    

    df['B_Stop_Loss_M15_H1'] = (10000 * (df['Price_M15'] - df['MS_L_H1'])) - pip_over
    df['B_Stop_Loss_M15_H1'] = df.apply(lambda x: x['B_Stop_Loss_M15_H1'] if x['B_Stop_Loss_M15_H1'] < pip_max else pip_max ,axis = 1)
    df['B_Stop_Loss_M15_H1'] = df.apply(lambda x: x['B_Stop_Loss_M15_H1'] if x['B_Stop_Loss_M15_H1'] > pip_min else pip_min ,axis = 1)
    df['B_Limit_M15_H1']     = ratio * df['B_Stop_Loss_M15_H1']
    df['S_Stop_Loss_M15_H1'] = 10000 * (df['MS_H_H1'] - df['Price_M15']) - pip_over
    df['S_Stop_Loss_M15_H1'] = df.apply(lambda x: x['S_Stop_Loss_M15_H1'] if x['S_Stop_Loss_M15_H1'] < pip_max else pip_max ,axis = 1)
    df['S_Stop_Loss_M15_H1'] = df.apply(lambda x: x['S_Stop_Loss_M15_H1'] if x['S_Stop_Loss_M15_H1'] > pip_min else pip_min ,axis = 1)
    df['S_Limit_M15_H1']     = ratio * df['S_Stop_Loss_M15_H1']

    m = 0
    for i, row in df.iterrows():
        
        if i < len(df)-k4:
            m = k4
        else:
            m = len(df) - i - 3
        
        print('Price - ',int(100*((i/20000)*20000/len(df))),'%') if ((i > 0) & (i%20000 == 0)) else ''

# BUY - Losing Index M15_H4
        
        if len(np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['B_Stop_Loss_M15_H4']))))[0]) == 0:
            df.loc[i, 'B_Lose_Idx_M15_H4'] = 1000000
        else:
            df.loc[i, 'B_Lose_Idx_M15_H4'] = np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['B_Stop_Loss_M15_H4']))))[0][0]       
                       
# BUY - Winning Index M15_H4
        
        if len(np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['B_Limit_M15_H4']))))[0]) == 0:
            df.loc[i, 'B_Win_Idx_M15_H4'] = 1000000

        else:
            df.loc[i, 'B_Win_Idx_M15_H4'] = np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['B_Limit_M15_H4']))))[0][0]

# SELL - Losing Index M15_H4
        
        if len(np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['S_Stop_Loss_M15_H4']))))[0]) == 0:
            df.loc[i, 'S_Lose_Idx_M15_H4'] = 1000000

        else:
            df.loc[i, 'S_Lose_Idx_M15_H4'] = np.where((df.loc[i+1:i+m,'High_M15'] >= row['Price_M15'] + (0.0001*(row['S_Stop_Loss_M15_H4']))))[0][0]

# SELL - Winning Index M15_H4
        
        if len(np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['S_Limit_M15_H4']))))[0]) == 0:
            df.loc[i, 'S_Win_Idx_M15_H4'] = 1000000

        else:
            df.loc[i, 'S_Win_Idx_M15_H4'] = np.where((df.loc[i+1:i+m,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['S_Limit_M15_H4']))))[0][0]

# ------------------------------------------------------------------------------------------------------------
        if i < len(df)-k1:
            n = k1
        else:
            n = len(df) - i - 3

# BUY - Losing Index M15_H1
        
        if len(np.where((df.loc[i+1:i+n,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['B_Stop_Loss_M15_H1']))))[0]) == 0:
            df.loc[i, 'B_Lose_Idx_M15_H1'] = 1000000
        else:
            df.loc[i, 'B_Lose_Idx_M15_H1'] = np.where((df.loc[i+1:i+n,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['B_Stop_Loss_M15_H1']))))[0][0]       
                       
# BUY - Winning Index M15_H1
        
        if len(np.where((df.loc[i+1:i+n,'High_M15'] >= row['Price_M15'] + (0.0001*(row['B_Limit_M15_H1']))))[0]) == 0:
            df.loc[i, 'B_Win_Idx_M15_H1'] = 1000000

        else:
            df.loc[i, 'B_Win_Idx_M15_H1'] = np.where((df.loc[i+1:i+n,'High_M15'] >= row['Price_M15'] + (0.0001*(row['B_Limit_M15_H1']))))[0][0]

# SELL - Losing Index M15_H1
        
        if len(np.where((df.loc[i+1:i+n,'High_M15'] >= row['Price_M15'] + (0.0001*(row['S_Stop_Loss_M15_H1']))))[0]) == 0:
            df.loc[i, 'S_Lose_Idx_M15_H1'] = 1000000

        else:
            df.loc[i, 'S_Lose_Idx_M15_H1'] = np.where((df.loc[i+1:i+n,'High_M15'] >= row['Price_M15'] + (0.0001*(row['S_Stop_Loss_M15_H1']))))[0][0]

# SELL - Winning Index M15_H1
        
        if len(np.where((df.loc[i+1:i+n,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['S_Limit_M15_H1']))))[0]) == 0:
            df.loc[i, 'S_Win_Idx_M15_H1'] = 1000000

        else:
            df.loc[i, 'S_Win_Idx_M15_H1'] = np.where((df.loc[i+1:i+n,'Low_M15'] <= row['Price_M15'] - (0.0001*(row['S_Limit_M15_H1']))))[0][0]
            
#Label M15_H4
# 1 if B_Win Index < B_Lose Index
    df['Labelb_M15_H4'] = df.apply(lambda x: 1 if x['B_Win_Idx_M15_H4'] < x['B_Lose_Idx_M15_H4'] else 0, axis=1)

# -1 if S_Win Index < S_Lose Index  
    df['Labels_M15_H4'] = df.apply(lambda x: 1 if x['S_Win_Idx_M15_H4'] < x['S_Lose_Idx_M15_H4'] else 0, axis=1) 

# Label M15_H1
# 1 if B_Win Index < B_Lose Index
    df['Labelb_M15_H1'] = df.apply(lambda x: 1 if x['B_Win_Idx_M15_H1'] < x['B_Lose_Idx_M15_H1'] else 0, axis=1)

# -1 if S_Win Index < S_Lose Index  
    df['Labels_M15_H1'] = df.apply(lambda x: 1 if x['S_Win_Idx_M15_H1'] < x['S_Lose_Idx_M15_H1'] else 0, axis=1) 
    
    return df

## Data Preparation
### Read Files and apply functions

In [None]:
#sd = '2007-01-02 00:00:00'
#ed = '2021-06-01 00:00:00'

#dff_D1  = Read_prep_df( File = '../0. Data/EURUSD_D1.xlsx' , n = 0,     sd = sd, ed = ed, clean_sweep = 1, swing_max = 3, perc_95 = 10000) # per_75 = 577.2; per_95 = 990
#dff_H4  = Read_prep_df( File = '../0. Data/EURUSD_H4.xlsx' , n = 0,     sd = sd, ed = ed, clean_sweep = 1, swing_max = 3, perc_95 = 480  ) # per_75 = 259.1; per_95 = 480
#dff_H1  = Read_prep_df( File = '../0. Data/EURUSD_H1.xlsx' , n = 15000, sd = sd, ed = ed, clean_sweep = 1, swing_max = 3, perc_95 = 220  ) # per_75 = 126.6! per_95 = 220
#dff_M15 = Read_prep_df( File = '../0. Data/EURUSD_M15.xlsx', n = 15000, sd = sd, ed = ed, clean_sweep = 1, swing_max = 3, perc_95 = 120  ) # per_75 = 81.8! per_95 = 120

### Merge dataframes - df

In [None]:
#df_D1_r   = Rename_df(dff_D1,'D1')
#df_H4_r   = Rename_df(dff_H4,'H4')
#df_H1_r   = Rename_df(dff_H1,'H1')
#df_M15_r  = Rename_df(dff_M15,'M15')

#df_H4_D1  = Merge_shift_df_H4_D1(df_H4_r,df_D1_r)
#df_H1_H4  = Merge_shift_df_H1_H4(df_H1_r,df_H4_D1)
#dff_M15_H1 = Merge_shift_df_M15_H1(df_M15_r,df_H1_H4)

### MS_retracement- df

In [None]:
#MS_retracement(dff_M15_H1)

### Price df

In [None]:
#dffv = Price_M15_H4(dff_M15_H1 , k = 5000, ratio = 3, pip_min = 15, pip_max = 125, pip_over = -3)
#dffv = Price_M15_H1(dffv       , k = 1500, ratio = 3, pip_min = 15, pip_max = 60 , pip_over = -3)

### Save Dataframes

In [None]:
#df_D1    .to_pickle('df_D1_EURUSD.pkl')
#df_H4    .to_pickle('df_H4_EURUSD.pkl')
#df_H1    .to_pickle('df_H1_EURUSD.pkl')
#df_M15   .to_pickle('df_M15_EURUSD.pkl')
#df_M15_H1.to_pickle('df_M15_H1.pkl')
#dfv      .to_pickle('dfv_EURUSD.pkl')

#dff_D1    .to_pickle('dff_D1_EURUSD.pkl')
#dff_H4    .to_pickle('dff_H4_EURUSD.pkl')
#dff_H1    .to_pickle('dff_H1_EURUSD.pkl')
#dff_M15   .to_pickle('dff_M15_EURUSD.pkl')
#dff_M15_H1.to_pickle('dff_M15_H1.pkl')
#dffv      .to_pickle('dffv_EURUSD.pkl')

## Load Raw Dataframes

In [None]:
df_D1     = pd.read_pickle('dff_D1_EURUSD.pkl')
df_H4     = pd.read_pickle('dff_H4_EURUSD.pkl')
df_H1     = pd.read_pickle('dff_H1_EURUSD.pkl')
df_M15    = pd.read_pickle('dff_M15_EURUSD.pkl')
#df_M15_H1 = pd.read_pickle('df_M15_H1.pkl')
dfv       = pd.read_pickle('dffv_EURUSD.pkl')

#df_D1     = pd.read_pickle('df_D1_EURUSD.pkl')
#df_H4     = pd.read_pickle('df_H4_EURUSD.pkl')
#df_H1     = pd.read_pickle('df_H1_EURUSD.pkl')
#df_M15    = pd.read_pickle('df_M15_EURUSD.pkl')
#df_M15_H1 = pd.read_pickle('df_M15_H1.pkl')
#dfv       = pd.read_pickle('dfv_EURUSD.pkl')

## Test Raw Data

In [None]:
# Test merge function
r, rge = random.randint(0, len(dfv)), 100
dfv.loc[r:r+rge,['Date_M15','Date_H1','Date_H4','Date_D1','Close_M15','Close_H1','Close_H4','Close_D1']]

In [None]:
# Test Closing Function
r, rge = random.randint(0, len(dfv)), 20
dfv.loc[r:r+rge,['Open_M15','Close_M15','Closing_M15','Open_H1','Close_H1','Closing_H1','Open_H4','Close_H4','Closing_H4','Open_D1','Close_D1','Closing_D1']]

In [None]:
# Test Market Structure
r, rge = random.randint(0, len(df_M15)), 20
df_M15.loc[r:r+rge,['Open','High','Low','Close','MS_Sit','MS_H','MS_L']]

In [None]:
# Test Trend & N_Breaks & MS_N
r, rge = random.randint(0, len(df_M15)), 200
df_M15[df_M15['MS_Sit'] != 'MS'].loc[r:r+rge,['MS_Sit','Trend','N_Breaks','MS_N']]

In [None]:
# Test MS_periods
r, rge = random.randint(0, len(df_M15)), 20
df_M15.loc[r:r+rge,['MS_Sit','MS_Pds']]

In [None]:
# Test MS_range
r, rge = random.randint(0, len(df_M15)), 20
df_M15.loc[r:r+rge,['MS_H','MS_L','MS_Sit','MS_range']]

In [None]:
# Test MS_retracement
r, rge = random.randint(0, len(dfv)), 20
dfv.loc[r:r+rge,['Date_M15','MS_L_M15','MS_H_M15','Close_M15','MS_Sit_M15','Trend_M15','MS_retracement_M15']]

In [None]:
# Test Price
r, rge = random.randint(0, len(dfv)), 50

dfv['Price_Lose'] = dfv['Price_M15'] + 0.0001 * dfv['S_Stop_Loss_M15_H1']
dfv['Price_Win'] = dfv['Price_M15'] - 0.0001 * dfv['S_Limit_M15_H1']
dfv.loc[r:r+rge,['High_M15','Low_M15','Close_M15','Price_M15','S_Stop_Loss_M15_H1','Price_Lose','S_Lose_Idx_M15_H1','S_Limit_M15_H1','Price_Win','S_Win_Idx_M15_H1','Labels_M15_H1']]

#dfv.drop(['Price_Lose','Price_Win'],axis = 1, inplace = True)

In [None]:
# Indicator - SMA
r, rge = random.randint(0, len(df_M15)), 20
df_M15.loc[r:r+rge,['Close','Close_im1','U_Move','D_Move','14_Avg_U','14_Avg_D','RSI_14_SMA']]

In [None]:
# Indicator - EMA
r, rge = random.randint(0, len(df_M15)), 20
df_M15.loc[r:r+rge,['Close','Close_im1','U_Move','D_Move','14_Avg_U_EMA','14_Avg_D_EMA','RSI_14_EMA']]

In [None]:
# Indicator - MACD
r, rge = random.randint(0, len(df_M15)), 20
df_M15.loc[r:r+rge,['EMA_12_macd','EMA_26_macd','MACD_12_26','EMA_MACD_12_26_9','Hist_MACD_12_26_9','MACD_signal']]

In [None]:
# Indicator - Bollinger Bands
r, rge = random.randint(0, len(df_M15)), 20
df_M15.loc[r:r+rge,['Close','Boll_SMA_20','Boll_SMA_20_Var_+2','Boll_SMA_20_Var_+2','Dist_BollB_SMA_20_Var_-2','Dist_BollB_SMA_20_Var_+2']]

## Clean Data

In [None]:
dfv = pd.read_pickle('dffv_EURUSD.pkl')
df  = clean_data(dfv)

## Explore Clean Data

### Functions

In [None]:
def Auto_Analysis(df, itr, M15_H1, prt_ret, a1b, a2b, b1b, b2b, c1b, c2b, d1b, d2b, r1, r2, s1, s2, t1, t2, u1, u2, n1, n2, o1, o2, p1, p2, q1, q2): 
    
    a1s, a2s, b1s, b2s, c1s, c2s, d1s, d2s = -a1b, -a2b, -b1b, -b2b, -c1b, -c2b, -d1b, -d2b   
    df3, df2, df1 = split3_dfs(df)    

    list_idx_1 = df1.index.to_list()
    list_idx_2 = df2.index.to_list()
    list_idx_3 = df3.index.to_list()

    # Initialising lists

    xpa1, xca1, xpb1, xcb1 = [], [], [], []
    xpa2, xca2, xpb2, xcb2 = [], [], [], []
    xpa3, xca3, xpb3, xcb3 = [], [], [], []

    ypa1, yca1, ypb1, ycb1 = [], [], [], []
    ypa2, yca2, ypb2, ycb2 = [], [], [], []
    ypa3, yca3, ypb3, ycb3 = [], [], [], []

    def analysis_b(df):

        x = df[(df['MS_retracement_M15'].between(-1000000, 1000000)) & 
           ((df['Trend_D1']  == a1b) | (df['Trend_D1']  == a2b)) & 
           ((df['Trend_H4']  == b1b) | (df['Trend_H4']  == b2b)) &
           ((df['Trend_H1']  == c1b) | (df['Trend_H1']  == c2b)) &
           ((df['Trend_M15'] == d1b) | (df['Trend_M15'] == d2b)) &
           #((df['Closing_D1'] == e1b) | (df['Closing_D1'] == e2b)) &
           #((df['Closing_H4'] == f1b) | (df['Closing_H4'] == f2b)) &
           #((df['Closing_H1'] == g1b) | (df['Closing_H1'] == g2b)) &
           #((df['Closing_H1'] == h1b) | (df['Closing_H1'] == h2b)) &
           (df['MS_retracement_D1'] .between(r1, r2)) &
           (df['MS_retracement_H4'] .between(s1, s2)) & 
           (df['MS_retracement_H1'] .between(t1, t2)) &
           (df['MS_retracement_M15'].between(u1, u2)) &
           #(df['N_Breaks_D1'] .between(k1, k2)) &
           #(df['N_Breaks_H4'] .between(l1, l2)) & 
           #(df['N_Breaks_H1'] .between(m1, m2)) &
           (df['MS_range_D1'] .between(n1, n2)) & 
           (df['MS_range_H4'] .between(o1, o2)) & 
           (df['MS_range_H1'] .between(p1, p2)) &
           (df['MS_range_M15'].between(q1, q2)) &
           (df['MS_retracement_M15'].between(-1000000, 1000000))]['Labelb_'+str(M15_H1)].describe()[:2]

        return x

    def analysis_s(df):
        
        y = df[(df['MS_retracement_M15'].between(-1000000, 1000000)) & 
           ((df['Trend_D1']  == a1s) | (df['Trend_D1']  == a2s)) & 
           ((df['Trend_H4']  == b1s) | (df['Trend_H4']  == b2s)) &
           ((df['Trend_H1']  == c1s) | (df['Trend_H1']  == c2s)) &
           ((df['Trend_M15'] == d1s) | (df['Trend_M15'] == d2s)) &
           #((df['Closing_D4'] == e1s) | (df['Closing_D1'] == e2s)) &
           #((df['Closing_H4'] == f1s) | (df['Closing_H4'] == f2s)) &
           #((df['Closing_H1'] == g1s) | (df['Closing_H1'] == g2s)) &
           #((df['Closing_H1'] == h1s) | (df['Closing_H1'] == h2s)) &
           (df['MS_retracement_D1'] .between(r1, r2)) &
           (df['MS_retracement_H4'] .between(s1, s2)) & 
           (df['MS_retracement_H1'] .between(t1, t2)) &
           (df['MS_retracement_M15'].between(u1, u2)) &
           #(df['N_Breaks_D1'] .between(k1, k2)) &
           #(df['N_Breaks_H4'] .between(l1, l2)) & 
           #(df['N_Breaks_H1'] .between(m1, m2)) &
           (df['MS_range_D1'] .between(n1, n2)) & 
           (df['MS_range_H4'] .between(o1, o2)) & 
           (df['MS_range_H1'] .between(p1, p2)) &
           (df['MS_range_M15'].between(q1, q2)) &
           (df['MS_retracement_M15'].between(-1000000, 1000000))]['Labels_'+str(M15_H1)].describe()[:2]

        return y
    
    if prt_ret == 'print':

        for i in range(0,itr):

            df1a = df1.loc[Space_dfa(df1,10),:]
            df1b = df1.loc[Space_dfb(list_idx_1,int(len(df1)/10)),:]
            df2a = df2.loc[Space_dfa(df2,10),:]
            df2b = df2.loc[Space_dfb(list_idx_2,int(len(df2)/10)),:]
            df3a = df3.loc[Space_dfa(df3,10),:]
            df3b = df3.loc[Space_dfb(list_idx_3,int(len(df3)/10)),:]

            xa1, xb1 = analysis_b(df1a), analysis_b(df1b)
            xa2, xb2 = analysis_b(df2a), analysis_b(df2b)
            xa3, xb3 = analysis_b(df3a), analysis_b(df3b)

            ya1, yb1 = analysis_s(df1a), analysis_s(df1b)
            ya2, yb2 = analysis_s(df2a), analysis_s(df2b)
            ya3, yb3 = analysis_s(df3a), analysis_s(df3b)

            xpa1.append(xa1[1])
            xca1.append(xa1[0])
            xpb1.append(xb1[1])
            xcb1.append(xb1[0])

            xpa2.append(xa2[1])
            xca2.append(xa2[0])
            xpb2.append(xb2[1])
            xcb2.append(xb2[0])

            xpa3.append(xa3[1])
            xca3.append(xa3[0])
            xpb3.append(xb3[1])
            xcb3.append(xb3[0])

            ypa1.append(ya1[1])
            yca1.append(ya1[0])
            ypb1.append(yb1[1])
            ycb1.append(yb1[0])

            ypa2.append(ya2[1])
            yca2.append(ya2[0])
            ypb2.append(yb2[1])
            ycb2.append(yb2[0])

            ypa3.append(ya3[1])
            yca3.append(ya3[0])
            ypb3.append(yb3[1])
            ycb3.append(yb3[0])

        count_b = round((mean(xca1)+mean(xca2)+mean(xca3)+mean(xcb1)+mean(xcb2)+mean(xcb3))/6,2)
        count_s = round((mean(yca1)+mean(yca2)+mean(yca3)+mean(ycb1)+mean(ycb2)+mean(ycb3))/6,2)

        avg_b  = round((mean(xpa1)*mean(xca1) + mean(xpa2)*mean(xca2) + mean(xpa3)*mean(xca3)+mean(xpb1)*mean(xcb1) + mean(xpb2)*mean(xcb2) + mean(xpb3)*mean(xcb3))/(6*count_b),2)   
        avg_s  = round((mean(ypa1)*mean(yca1) + mean(ypa2)*mean(yca2) + mean(ypa3)*mean(yca3)+mean(ypb1)*mean(ycb1) + mean(ypb2)*mean(ycb2) + mean(ypb3)*mean(ycb3))/(6*count_s),2)   

        print('TOTAL SCORE BUY  --- MEAN:',avg_b,' --- COUNT:',count_b)
        print('TOTAL SCORE SELL --- MEAN:',avg_s,' --- COUNT:',count_s)
        print('BUY VBLES   :','(',a1b,a2b,')(',b1b,b2b,')(',c1b,c2b,')(',d1b,d2b,')')
        print('SELL VBLES  :','(',a1s,a2s,')(',b1s,b2s,')(',c1s,c2s,')(',d1s,d2s,')')
        print('COMMON VBLES:','(',r1,r2,')(',s1,s2,')(',t1,t2,')(',u1,u2,')n(',n1,n2,')o(',o1,o2,')p(',p1,p2,')q(',q1,q2,')')
        print('BUY DFRAMES:')
        print('df1a',round(mean(xpa1),2),round(mean(xca1),2),' - df1b',round(mean(xpb1),2),round(mean(xcb1),2))
        print('df2a',round(mean(xpa2),2),round(mean(xca2),2),' - df2b',round(mean(xpb2),2),round(mean(xcb2),2))
        print('df3a',round(mean(xpa3),2),round(mean(xca3),2),' - df3b',round(mean(xpb3),2),round(mean(xcb3),2))
        print('Tot score:')
        print('mean:',avg_b,'   count:',count_b)
        print('SELL DFRAMES:')
        print('df1a',round(mean(ypa1),2),round(mean(yca1),2),' - df1b',round(mean(ypb1),2),round(mean(ycb1),2))
        print('df2a',round(mean(ypa2),2),round(mean(yca2),2),' - df2b',round(mean(ypb2),2),round(mean(ycb2),2))
        print('df3a',round(mean(ypa3),2),round(mean(yca3),2),' - df3b',round(mean(ypb3),2),round(mean(ycb3),2))
        print('Tot score:')
        print('mean:',avg_s,'   count:',count_s)
        
    else:
        x = df[(df['MS_retracement_M15'].between(-1000000, 1000000)) & 
           ((df['Trend_D1']  == a1b) | (df['Trend_D1']  == a2b)) & 
           ((df['Trend_H4']  == b1b) | (df['Trend_H4']  == b2b)) &
           ((df['Trend_H1']  == c1b) | (df['Trend_H1']  == c2b)) &
           ((df['Trend_M15'] == d1b) | (df['Trend_M15'] == d2b)) &
           #((df['Closing_D4'] == e1b) | (df['Closing_D1'] == e2b)) &
           #((df['Closing_H4'] == f1b) | (df['Closing_H4'] == f2b)) &
           #((df['Closing_H1'] == g1b) | (df['Closing_H1'] == g2b)) &
           #((df['Closing_H1'] == h1b) | (df['Closing_H1'] == h2b)) &
           (df['MS_retracement_D1'] .between(r1, r2)) &
           (df['MS_retracement_H4'] .between(s1, s2)) & 
           (df['MS_retracement_H1'] .between(t1, t2)) &
           (df['MS_retracement_M15'].between(u1, u2)) &
           #(df['N_Breaks_D1'] .between(k1, k2)) &
           #(df['N_Breaks_H4'] .between(l1, l2)) & 
           #(df['N_Breaks_H1'] .between(m1, m2)) &
           (df['MS_range_D1'] .between(n1, n2)) & 
           (df['MS_range_H4'] .between(o1, o2)) & 
           (df['MS_range_H1'] .between(p1, p2)) &
           (df['MS_range_M15'].between(q1, q2)) &
           (df['MS_retracement_M15'].between(-1000000, 1000000))]

        y = df[(df['MS_retracement_M15'].between(-1000000, 1000000)) & 
           ((df['Trend_D1']  == a1s) | (df['Trend_D1']  == a2s)) & 
           ((df['Trend_H4']  == b1s) | (df['Trend_H4']  == b2s)) &
           ((df['Trend_H1']  == c1s) | (df['Trend_H1']  == c2s)) &
           ((df['Trend_M15'] == d1s) | (df['Trend_M15'] == d2s)) &
           #((df['Closing_D4'] == e1s) | (df['Closing_D1'] == e2s)) &
           #((df['Closing_H4'] == f1s) | (df['Closing_H4'] == f2s)) &
           #((df['Closing_H1'] == g1s) | (df['Closing_H1'] == g2s)) &
           #((df['Closing_H1'] == h1s) | (df['Closing_H1'] == h2s)) &
           (df['MS_retracement_D1'] .between(r1, r2)) &
           (df['MS_retracement_H4'] .between(s1, s2)) & 
           (df['MS_retracement_H1'] .between(t1, t2)) &
           (df['MS_retracement_M15'].between(u1, u2)) &
           #(df['N_Breaks_D1'] .between(k1, k2)) &
           #(df['N_Breaks_H4'] .between(l1, l2)) & 
           #(df['N_Breaks_H1'] .between(m1, m2)) &
           (df['MS_range_D1'] .between(n1, n2)) & 
           (df['MS_range_H4'] .between(o1, o2)) & 
           (df['MS_range_H1'] .between(p1, p2)) &
           (df['MS_range_M15'].between(q1, q2)) &
           (df['MS_retracement_M15'].between(-1000000, 1000000))]
        
        return x, y

In [None]:
def split3_dfs(df):
    
    df1 = df.loc[0:int(len(df)/3),:].copy()
    df2 = df.loc[int(  len(df)/3) +1:int(2*len(df)/3),:].copy()
    df3 = df.loc[int(2*len(df)/3) +1:,:].copy()
    
    df1 = df1.reset_index(drop=True)
    df2 = df2.reset_index(drop=True)
    df3 = df3.reset_index(drop=True)
    
    return df1, df2, df3

In [None]:
# Space_dfa - Function that spaces dataframe rows by a distance k

import random

def Space_dfa(df,k):
    
    l1 = df.index.to_list()
    r  = random.randint(0, k)
    l2 = [r]
    
    for i in range(r+1,len(df),k):
        l2.append(i)
    
    return l2

In [None]:
# Space_dfb - Function that picks 'size' number of random rows from dataframe

import random

def Space_dfb(l1,size):

    l2 = random.sample(l1, size)
    l2.sort()
    return l2

In [None]:
# Identify when these point happen

def N_months(df):
    
    return pd.to_datetime(pd.Series(df['Date_M15']), format = '%Y%m%d').apply(lambda x: x.strftime('%Y-%m')).nunique()
    
def diff_month(df):
    
    return ((df.reset_index().loc[len(df.reset_index())-1,'Date_M15'].year - df.reset_index().loc[0,'Date_M15'].year) * 12) + (df.reset_index().loc[len(df.reset_index())-1,'Date_M15'].month - df.reset_index().loc[0,'Date_M15'].month)    
    
def N_months_concat(df1, df2):
    
    s1 = pd.to_datetime(pd.Series(df1['Date_M15']), format = '%Y%m%d').apply(lambda x: x.strftime('%Y-%m'))
    s2 = pd.to_datetime(pd.Series(df2['Date_M15']), format = '%Y%m%d').apply(lambda x: x.strftime('%Y-%m'))
    
    return s1.append(s2).nunique()

def N_months_concat4(df1, df2, df3, df4):
    
    s1 = pd.to_datetime(pd.Series(df1['Date_M15']), format = '%Y%m%d').apply(lambda x: x.strftime('%Y-%m'))
    s2 = pd.to_datetime(pd.Series(df2['Date_M15']), format = '%Y%m%d').apply(lambda x: x.strftime('%Y-%m'))
    s3 = pd.to_datetime(pd.Series(df3['Date_M15']), format = '%Y%m%d').apply(lambda x: x.strftime('%Y-%m'))
    s4 = pd.to_datetime(pd.Series(df4['Date_M15']), format = '%Y%m%d').apply(lambda x: x.strftime('%Y-%m'))
    
    return s1.append(s2).append(s3).append(s4).nunique()

In [None]:
'''PARAMETERS USED:

Market_Structure(df,clean_sweep,x,perc_95)

    D1  --> clean_sweep = 1; x irrelevant; p_100 = 100000
    H4  --> clean_sweep = 1; x irrelevant; p_95  = 480
    H1  --> clean_sweep = 1; x irrelevant; p_95  = 220
    M15 --> clean_sweep = 1; x irrelevant; p_x   = 220

Price_M15_H4(df_M15_H1, k = 5000, ratio = 3, pip_min = 15, pip_max = 125, pip_over = -3)
Price_M15_H1(df_M15_H1, k = 1500, ratio = 3, pip_min = 15, pip_max = 90 , pip_over = -3)
'''

def MS_index_M15(df):
    
# Show data analitics based on individual Market structures (without taking into account repetitions)
    idx_list_break = []
    for i in range (1,len(df)-1):
    
        if (df.loc[i,'MS_Sit_M15'] == 'MS') & ((df.loc[i+1,'MS_Sit_M15'] == 'Up_Break') | (df.loc[i+1,'MS_Sit_M15'] == 'Dw_Break')):
            idx_list_break.append(i)
        else:
            continue
            
    return idx_list_break

def MS_index_H1(df):
    
# Show data analitics based on individual Market structures (without taking into account repetitions)
    idx_list_break = []
    for i in range (1,len(df)-1):
    
        if (df.loc[i,'MS_Sit_H1'] == 'MS') & ((df.loc[i+1,'MS_Sit_H1'] == 'Up_Break') | (df.loc[i+1,'MS_Sit_H1'] == 'Dw_Break')):
            idx_list_break.append(i)
        else:
            continue
            
    return idx_list_break

def MS_index_H4(df):
    
# Show data analitics based on individual Market structures (without taking into account repetitions)
    idx_list_break = []
    for i in range (1,len(df)-1):
    
        if (df.loc[i,'MS_Sit_H4'] == 'MS') & ((df.loc[i+1,'MS_Sit_H4'] == 'Up_Break') | (df.loc[i+1,'MS_Sit_H4'] == 'Dw_Break')):
            idx_list_break.append(i)
        else:
            continue
            
    return idx_list_break

def MS_index_D1(df):
    
# Show data analitics based on individual Market structures (without taking into account repetitions)
    idx_list_break = []
    for i in range (1,len(df)-1):
    
        if (df.loc[i,'MS_Sit_D1'] == 'MS') & ((df.loc[i+1,'MS_Sit_D1'] == 'Up_Break') | (df.loc[i+1,'MS_Sit_D1'] == 'Dw_Break')):
            idx_list_break.append(i)
        else:
            continue
            
    return idx_list_break

### Plot Data

In [None]:
# Explores df_D1, df_H4, df_H1 or df_M15 data

r, rge = str(df_M15.loc[random.randint(0, len(df_M15)),'Date']), 100
plot_MS(df_M15, r, rge)

In [None]:
# Explores df data -- NEEEEDS CHANGING

#r, rge = str(df.loc[random.randint(0, len(df)),'Date_H1']), 25
#plot_MS_all(df, r, rge)

### MS analysis

In [None]:
# Read, clean and split the df in 3 for H1_b, H1_s, H1_H4_b, H1_H4_s

dfv = pd.read_pickle('dffv_EURUSD.pkl')
df = clean_data(dfv)

In [None]:
# Calculations
df_idx_M15 = MS_index_M15(df)
df_idx_H1  = MS_index_H1 (df)
df_idx_H4  = MS_index_H4 (df)
df_idx_D1  = MS_index_D1 (df)

N_df_M15_T_Up = sum(df.loc[df_idx_M15,'Trend_M15'] ==  1)
N_df_H1_T_Up  = sum(df.loc[df_idx_H1 ,'Trend_H1' ] ==  1)
N_df_H4_T_Up  = sum(df.loc[df_idx_H4 ,'Trend_H4' ] ==  1)
N_df_D1_T_Up  = sum(df.loc[df_idx_D1 ,'Trend_D1' ] ==  1)
N_df_M15_T_Dw = sum(df.loc[df_idx_M15,'Trend_M15'] == -1)
N_df_H1_T_Dw  = sum(df.loc[df_idx_H1 ,'Trend_H1' ] == -1)
N_df_H4_T_Dw  = sum(df.loc[df_idx_H4 ,'Trend_H4' ] == -1)
N_df_D1_T_Dw  = sum(df.loc[df_idx_D1 ,'Trend_D1' ] == -1)

N_df_B_pts_M15_H4 = df[df['Labelb_M15_H4'] == 1]['Date_M15'].count()
N_df_S_pts_M15_H4 = df[df['Labels_M15_H4'] == 1]['Date_M15'].count()
N_df_B_pts_M15_H1 = df[df['Labelb_M15_H1'] == 1]['Date_M15'].count()
N_df_S_pts_M15_H1 = df[df['Labels_M15_H1'] == 1]['Date_M15'].count()

l0 = df['MS_range_M15'].describe().to_list()
l0 = [round(x,1) for x in l0]
l1 = df['MS_range_H1'].describe().to_list()
l1 = [round(x,1) for x in l1]
l2 = df['MS_range_H4'].describe().to_list()
l2 = [round(x,1) for x in l2]
l3 = df['MS_range_D1'].describe().to_list()
l3 = [round(x,1) for x in l3]

# Prints
print('---- NUMBER OF BUYING AND SELLING POINTS for M15_H4 -----')
print('df : No. Buying  points (No. 1´s in [Labelb_M15_H4])',N_df_B_pts_M15_H4,'(',100*round(N_df_B_pts_M15_H4/len(df),3),'%)')
print('   : No. Selling points (No. 1´s in [Labels_M15_H4])',N_df_S_pts_M15_H4,'(',100*round(N_df_S_pts_M15_H4/len(df),3),'%)')

print('---- NUMBER OF BUYING AND SELLING POINTS for M15_H1 -----')
print('df : No. Buying  points (No. 1´s in [Labelb_M15_H1])',N_df_B_pts_M15_H1,'(',100*round(N_df_B_pts_M15_H1/len(df),3),'%)')
print('   : No. Selling points (No. 1´s in [Labels_M15_H1])',N_df_S_pts_M15_H1,'(',100*round(N_df_S_pts_M15_H1/len(df),3),'%)')

print('---- NUMBER OF Up & Dw TRENDS in MS STRUCTURES -----')
print('dfs: No. M15 MS´s trending (Up,Dw): (',N_df_M15_T_Up,N_df_M15_T_Dw,') of',len(df_idx_M15),'H1 MS´s')
print('dfs: No. H1  MS´s trending (Up,Dw): (',N_df_H1_T_Up,N_df_H1_T_Dw  ,') of',len(df_idx_H1),'H1 MS´s')
print('dfs: No. H4  MS´s trending (Up,Dw): (',N_df_H4_T_Up,N_df_H4_T_Dw  ,') of',len(df_idx_H4),'H4 MS´s')
print('dfs: No. D1  MS´s trending (Up,Dw): (',N_df_D1_T_Up,N_df_D1_T_Dw  ,') of',len(df_idx_D1),'D1 MS´s')

print('---- NUMBER OF MS STRUCTURES AFFECTED BY CORRECTIONS -----')
print(sum(df.loc[df_idx_H1,'MS_range_M15'] >= df['MS_range_M15'].max()),'MS´s of',len(df_idx_M15),'are affected from the MS function in M15')
print(sum(df.loc[df_idx_H1,'MS_range_H1']  >= df['MS_range_H1'].max()) ,'MS´s of',len(df_idx_H1) ,'are affected from the MS function in H1')
print(sum(df.loc[df_idx_H4,'MS_range_H4']  >= df['MS_range_H4'].max()) ,'MS´s of',len(df_idx_H4) ,'are affected from the MS function in H4')
print(sum(df.loc[df_idx_D1,'MS_range_D1']  >= df['MS_range_D1'].max()) ,'MS´s of',len(df_idx_D1) ,'are affected from the MS function in D1')

print('----- BUY - STOP LOSS LIMITATIONS M15_H4 -----')
print('df :   ',df  [df  ['B_Stop_Loss_M15_H4'] <= df['B_Stop_Loss_M15_H4'].min()].count()['Date_M15'],'are limited to',df['B_Stop_Loss_M15_H4'].min(),'pips out of',len(df))
print('df :   ',df  [df  ['B_Stop_Loss_M15_H4'] >= df['B_Stop_Loss_M15_H4'].max()].count()['Date_M15'],'are limited to',df['B_Stop_Loss_M15_H4'].max(),'pips out of',len(df))

print('----- SELL - STOP LOSS LIMITATIONS M15_H4 -----')
print('df :   ',df  [df  ['S_Stop_Loss_M15_H4'] <= df['S_Stop_Loss_M15_H4'].min()].count()['Date_M15'],'are limited to',df['S_Stop_Loss_M15_H4'].min(),'pips out of',len(df))
print('df :   ',df  [df  ['S_Stop_Loss_M15_H4'] >= df['S_Stop_Loss_M15_H4'].max()].count()['Date_M15'],'are limited to',df['S_Stop_Loss_M15_H4'].max(),'pips out of',len(df))

print('----- BUY - STOP LOSS LIMITATIONS M15_H1 -----')
print('df :   ',df  [df  ['B_Stop_Loss_M15_H1'] <= df['B_Stop_Loss_M15_H1'].min()].count()['Date_M15'],'are limited to',df['B_Stop_Loss_M15_H1'].min(),'pips out of',len(df))
print('df :   ',df  [df  ['B_Stop_Loss_M15_H1'] >= df['B_Stop_Loss_M15_H1'].max()].count()['Date_M15'],'are limited to',df['B_Stop_Loss_M15_H1'].max(),'pips out of',len(df))

print('----- SELL - STOP LOSS LIMITATIONS M15_H1 -----')
print('df :   ',df  [df  ['S_Stop_Loss_M15_H1'] <= df['S_Stop_Loss_M15_H1'].min()].count()['Date_M15'],'are limited to',df['S_Stop_Loss_M15_H1'].min(),'pips out of',len(df))
print('df :   ',df  [df  ['S_Stop_Loss_M15_H1'] >= df['S_Stop_Loss_M15_H1'].max()].count()['Date_M15'],'are limited to',df['S_Stop_Loss_M15_H1'].max(),'pips out of',len(df),'\n')

print('----- MS Range Stats -----')
print('MS_range_M15 mean:',l0[1],l0[3:])
print('MS_range_H1  mean:',l1[1],l1[3:])
print('MS_range_H4  mean:',l2[1],l2[3:])
print('MS_range_D1  mean:',l3[1],l3[3:])

### Label M15_H4
#### Full Manual Filtering Anaylisis

In [None]:
# df Exploration

#                          min   25%    50%    75%    max
#MS_range_D1  mean: 472.4 [39.4, 283.9, 407.9, 551.3, 2143.5]
#MS_range_H4  mean: 205.7 [19.1, 120.9, 179.5, 260.7, 480.0]
#MS_range_H1  mean: 111.9 [10.1, 68.8,  100.9, 146.3, 220.0]
#MS_range_M15 mean: 59.3  [3.1, 35.5, 54.0, 79.4, 120.0]

#Auto_Analysis(df, 100, 'H1_H4', 'print',        1, 1, 1, 1,-1, 1,           23.6, 78.6, 23.6, 78.6, 0, 99,           0,   700, 270, 10000, 0, 10000)
#Auto_Analysis(df, 100, 'H1_H4', 'print', 1, -1, 1, 1, 1, 1,-1, 1, 0, 10000, 23.6, 78.6, 23.6, 78.6, 0, 99, 0, 10000, 0,   400, 150, 10000, 0, 10000)

#dfr = pd.read_pickle('dfp_M15_H1_EURUSD.pkl')

df3, df2, df1 = split3_dfs(df)

Fib_listm1 = [0, 23.6, 38.2, 50, 61.8, 78.6, 90]
Fib_list =   [0, 23.6, 38.2, 50, 61.8, 78.6, 90, 99]
list_idx_1 = df1.index.to_list()
list_idx_2 = df2.index.to_list()
list_idx_3 = df3.index.to_list()

# Buy Vbles

a1b, a2b = 1, 1
b1b, b2b = 1, 1
c1b, c2b =     -1, 1
d1b, d2b =     -1, 1

# Sell Vbles

a1s, a2s, b1s, b2s, c1s, c2s, d1s, d2s = -a1b, -a2b, -b1b, -b2b, -c1b, -c2b, -d1b, -d2b

# Common Vbles

r1, r2 = 23.6, 78.6 
s1, s2 = 23.6, 78.6 
t1, t2 = 0, 99
u1, u2 = 0, 99

n1, n2 = 0, 700
o1, o2 = 270, 10000
p1, p2 = 0, 10000 #80, 190
q1, q2 = 0, 10000

for i in range(0,100000):
    
    df1a = df1.loc[Space_dfa(df1,10),:]
    df1b = df1.loc[Space_dfb(list_idx_1,int(len(df1)/10)),:]
    df2a = df2.loc[Space_dfa(df2,10),:]
    df2b = df2.loc[Space_dfb(list_idx_2,int(len(df2)/10)),:]
    df3a = df3.loc[Space_dfa(df3,10),:]
    df3b = df3.loc[Space_dfb(list_idx_3,int(len(df3)/10)),:]

# Random vbles assignments

#    a1b = random.choice([-1, 1,-1, 1, 1, 1])
#    a1s = -a1b
#    a2b = random.choice([-1, 1,-1, 1,-1, 1])
#    a2s = -a2b
    
#    b1b = random.choice([-1, 1,-1, 1, 1, 1])
#    b1s = -b1b
#    b2b = random.choice([-1, 1,-1, 1,-1, 1])
#    b2s = -b2b
    
#    c1b = random.choice([-1, 1,-1, 1, 1, 1])
#    c1s = -c1b
#    c2b = random.choice([-1, 1,-1, 1,-1, 1])
#    c2s = -c2b
    
#    d1b = random.choice([-1, 1,-1, 1, 1, 1])
#    d1s = -d1b
#    d2b = random.choice([-1, 1,-1, 1,-1, 1])
#    d2s = -d2b

#    r1 = random.choice([38.2])
#    r2 = random.choice([78.6, 90])#[i for i in Fib_list if i > r1])

#    s1 = random.choice([38.2, 50]) #[-10000, 23.6])
#    s2 = random.choice([61.8, 78.6, 90, 99])
    
#    t1 = random.choice([38.2, 50])
#    t2 = random.choice([61.8, 78.6, 90])#[i for i in Fib_list if i > t1]) #[78.6, 90, 99, 10000])
    
#    u1 = random.choice([23.6, 38.2])
#    u2 = random.choice([78.6, 90, 99])#[i for i in Fib_list if i > t1]) #[78.6, 90, 99, 10000])
    
#    n1 = random.randrange(1000,1700,300)
#    n2 = random.randrange(700,1700,300)
    
#    o1 = random.choice([0,50, 100])
#    o2 =  random.choice([300,350, 400, 450])
    
#    p1 = random.randrange(0,101,25)
#    p2 = random.choice([219,220])

#    q1 = random.randrange(0,101,25)
#    q2 = random.choice([219,220])
    
    if (i % 1000) == 0: print('-->',i)
    
    def analysis_b(df):
    
        x = df[(df['MS_retracement_M15'].between(-1000000, 1000000)) & 
           ((df['Trend_D1']  == a1b) | (df['Trend_D1']  == a2b)) & 
           ((df['Trend_H4']  == b1b) | (df['Trend_H4']  == b2b)) &
           ((df['Trend_H1']  == c1b) | (df['Trend_H1']  == c2b)) &
           ((df['Trend_M15'] == d1b) | (df['Trend_M15'] == d2b)) &
           #((df['Closing_D4'] == e1b) | (df['Closing_D1'] == e2b)) &
           #((df['Closing_H4'] == f1b) | (df['Closing_H4'] == f2b)) &
           #((df['Closing_H1'] == g1b) | (df['Closing_H1'] == g2b)) &
           #((df['Closing_M15'] == h1b) | (df['Closing_M15'] == h2b)) &
           (df['MS_retracement_D1'] .between(r1, r2)) &
           (df['MS_retracement_H4'] .between(s1, s2)) & 
           (df['MS_retracement_H1'] .between(t1, t2)) &
           (df['MS_retracement_M15'].between(u1, u2)) &
           #(df['N_Breaks_D1'] .between(k1, k2)) &
           #(df['N_Breaks_H4'] .between(l1, l2)) & 
           #(df['N_Breaks_H1'] .between(m1, m2)) &
           (df['MS_range_D1'] .between(n1, n2)) & 
           (df['MS_range_H4'] .between(o1, o2)) & 
           (df['MS_range_H1'] .between(p1, p2)) &
           (df['MS_range_M15'].between(q1, q2)) &
           (df['MS_retracement_M15'].between(-1000000, 1000000))]['Labelb_M15_H4'].describe()[:2]
        
        return x
        
    def analysis_s(df):
    
        y = df[(df['MS_retracement_M15'].between(-1000000, 1000000)) & 
           ((df['Trend_D1']  == a1s) | (df['Trend_D1']  == a2s)) & 
           ((df['Trend_H4']  == b1s) | (df['Trend_H4']  == b2s)) &
           ((df['Trend_H1']  == c1s) | (df['Trend_H1']  == c2s)) &
           ((df['Trend_M15'] == d1s) | (df['Trend_M15'] == d2s)) &
           #((df['Closing_D4'] == e1s) | (df['Closing_D1'] == e2s)) &
           #((df['Closing_H4'] == f1s) | (df['Closing_H4'] == f2s)) &
           #((df['Closing_H1'] == g1s) | (df['Closing_H1'] == g2s)) &
           #((df['Closing_H1'] == h1s) | (df['Closing_H1'] == h2s)) &
           (df['MS_retracement_D1'] .between(r1, r2)) &
           (df['MS_retracement_H4'] .between(s1, s2)) & 
           (df['MS_retracement_H1'] .between(t1, t2)) &
           (df['MS_retracement_M15'].between(u1, u2)) &
           #(df['N_Breaks_D1'] .between(k1, k2)) &
           #(df['N_Breaks_H4'] .between(l1, l2)) & 
           #(df['N_Breaks_H1'] .between(m1, m2)) &
           (df['MS_range_D1'] .between(n1, n2)) & 
           (df['MS_range_H4'] .between(o1, o2)) & 
           (df['MS_range_H1'] .between(p1, p2)) &
           (df['MS_range_M15'].between(q1, q2)) &
           (df['MS_retracement_M15'].between(-1000000, 1000000))]['Labels_M15_H4'].describe()[:2]

        return y
    
    xa1 = analysis_b(df1a)
    xb1 = analysis_b(df1b)
    xa2 = analysis_b(df2a)
    xb2 = analysis_b(df2b)
    xa3 = analysis_b(df3a)
    xb3 = analysis_b(df3b)
    
    ya1 = analysis_s(df1a)
    yb1 = analysis_s(df1b)
    ya2 = analysis_s(df2a)
    yb2 = analysis_s(df2b)
    ya3 = analysis_s(df3a)
    yb3 = analysis_s(df3b)
    
    par_a = 0.3
    par_b = 40
    par_c = 0.25
    par_d = 20
    
    count_b = (xa1[0]+xb1[0]+xa2[0]+xb2[0]+xa3[0]+xb3[0])/6
    #count_b = (xb1[0]+xb2[0]+xb3[0])/3
    mean_b  = (xa1[0]*xa1[1]+xb1[0]*xb1[1]+xa2[0]*xa2[1]+xb2[0]*xb2[1]+xa3[0]*xa3[1]+xb3[0]*xb3[1])/(6*count_b)
    #mean_b  = (xb1[0]*xb1[1]+xb2[0]*xb2[1]+xb3[0]*xb3[1])/(3*count_b)
    
    count_s = (ya1[0]+yb1[0]+ya2[0]+yb2[0]+ya3[0]+yb3[0])/6
    #count_s = (yb1[0]+yb2[0]+yb3[0])/3
    mean_s  = (ya1[0]*ya1[1]+yb1[0]*yb1[1]+ya2[0]*ya2[1]+yb2[0]*yb2[1]+ya3[0]*ya3[1]+yb3[0]*yb3[1])/(6*count_s) 
    #mean_s  = (yb1[0]*yb1[1]+yb2[0]*yb2[1]+yb3[0]*yb3[1])/(3*count_s)
    
    if ((mean_b > par_a) & (mean_s > par_a) & (count_b > par_b) & (count_s > par_b) & (xa1[1] > par_c) & (xa1[0] > par_d) & (xb1[1] > par_c) & (xb1[0] > par_d) & (xa2[1] > par_c) & (xa2[0] > par_d) & (xb2[1] > par_c) & (xb2[0] > par_d) & (xa3[1] > par_c) & (xa3[0] > par_d) & (xb3[1] > par_c) & (xb3[0] > par_d) & (ya1[1] > par_c) & (ya1[0] > par_d) & (yb1[1] > par_c) & (yb1[0] > par_d) & (ya2[1] > par_c) & (ya2[0] > par_d) & (yb2[1] > par_c) & (yb2[0] > par_d) & (ya3[1] > par_c) & (ya3[0] > par_d) & (yb3[1] > par_c) & (yb3[0] > par_d)):
    #if ((mean_b > par_a) & (mean_s > par_a) & (count_b > par_b) & (count_s > par_b) & (xb1[1] > par_c) & (xb1[0] > par_d) & (xb2[1] > par_c) & (xb2[0] > par_d) & (xb3[1] > par_c) & (xb3[0] > par_d) & (yb1[1] > par_c) & (yb1[0] > par_d) & (yb2[1] > par_c) & (yb2[0] > par_d) & (yb3[1] > par_c) & (yb3[0] > par_d)):
        
        print(i)
        print('TOTAL SCORE BUY  --- MEAN:',mean_b,' --- COUNT:',count_b)
        print('TOTAL SCORE SELL --- MEAN:',mean_s,' --- COUNT:',count_s)
        print('BUY VBLES   :','(',a1b,a2b,')(',b1b,b2b,')(',c1b,c2b,')(',d1b,d2b,')')
        print('SELL VBLES  :','(',a1s,a2s,')(',b1s,b2s,')(',c1s,c2s,')(',d1s,d2s,')')
        print('COMMON VBLES:','(',r1,r2,')(',s1,s2,')(',t1,t2,')(',u1,u2,')n(',n1,n2,')o(',o1,o2,')p(',p1,p2,')q(',q1,q2,')')
        print('BUY DFRAMES:')
        print('1a',i, xa1[0],xa1[1])
        print('1b',i, xb1[0],xb1[1])
        print('2a',i, xa2[0],xa2[1])
        print('2b',i, xb2[0],xb2[1])
        print('3a',i, xa3[0],xa3[1])
        print('3b',i, xb3[0],xb3[1])
        print('SELL DFRAMES:')
        print('1a',i, ya1[0],ya1[1])
        print('1b',i, yb1[0],yb1[1])
        print('2a',i, ya2[0],ya2[1])
        print('2b',i, yb2[0],yb2[1])
        print('3a',i, ya3[0],ya3[1])
        print('3b',i, yb3[0],yb3[1])

#### Solutions M15_H4

In [None]:
Auto_Analysis(df, 30, 'M15_H4', 'print', 1, 1, 1, 1, -1, 1, -1, 1,0, 78.6, 23.6, 78.6, 0, 90, 0, 99, 0, 1200, 280, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H4', 'ret', 1, 1, 1, 1, -1, 1, -1, 1,0, 78.6, 23.6, 78.6, 0, 90, 0, 99, 0, 1200, 280, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H4', 'print', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 78.6, 23.6, 78.6, 0, 99, 0, 99, 0, 700, 270, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H4', 'ret', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 78.6, 23.6, 78.6, 0, 99, 0, 99, 0, 700, 270, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H4', 'print', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 90, 23.6, 78.6, 0, 90, 0, 99, 0, 700, 270, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H4', 'ret', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 90, 23.6, 78.6, 0, 90, 0, 99, 0, 700, 270, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H4', 'print', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 90, 23.6, 90, 23.6, 99, 0, 99, 0, 700, 270, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H4', 'ret', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 90, 23.6, 90, 23.6, 99, 0, 99, 0, 700, 270, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H4', 'print', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 78.6, 0, 78.6, 0, 99, 0, 99, 0, 1400, 280, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H4', 'ret', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 78.6, 0, 78.6, 0, 99, 0, 99, 0, 1400, 280, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H4', 'print', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 90, 0, 78.6, 0, 90, 0, 99, 0, 800, 280, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H4', 'ret', 1, 1, 1, 1, -1, 1, -1, 1, 23.6, 90, 0, 78.6, 0, 90, 0, 99, 0, 800, 280, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

### Label_M15_H1
#### Full Manual Filtering Anaylisis

In [None]:
dfv = pd.read_pickle('dffv_EURUSD.pkl')
df = clean_data(dfv)

In [None]:
# df Exploration

#                          min   25%    50%    75%    max
#MS_range_D1  mean: 472.4 [39.4, 283.9, 407.9, 551.3, 2143.5]
#MS_range_H4  mean: 205.7 [19.1, 120.9, 179.5, 260.7, 480.0]
#MS_range_H1  mean: 111.9 [10.1, 68.8,  100.9, 146.3, 220.0]
#MS_range_M15 mean: 59.3  [3.1, 35.5, 54.0, 79.4, 120.0]

#Auto_Analysis(df, 100, 'H1_H4', 'print',        1, 1, 1, 1,-1, 1,           23.6, 78.6, 23.6, 78.6, 0, 99,           0,   700, 270, 10000, 0, 10000)
#Auto_Analysis(df, 100, 'H1_H4', 'print', 1, -1, 1, 1, 1, 1,-1, 1, 0, 10000, 23.6, 78.6, 23.6, 78.6, 0, 99, 0, 10000, 0,   400, 150, 10000, 0, 10000)

#dfr = pd.read_pickle('dfp_M15_H1_EURUSD.pkl')

df3, df2, df1 = split3_dfs(df)

Fib_listm1 = [0, 23.6, 38.2, 50, 61.8]
Fib_list =   [0, 23.6, 38.2, 50, 61.8, 78.6, 90, 99]
list_idx_1 = df1.index.to_list()
list_idx_2 = df2.index.to_list()
list_idx_3 = df3.index.to_list()

# Buy Vbles

a1b, a2b =     -1, 1
b1b, b2b =     -1, 1
c1b, c2b =     1, 1
d1b, d2b =     -1, 1

# Sell Vbles

a1s, a2s, b1s, b2s, c1s, c2s, d1s, d2s = -a1b, -a2b, -b1b, -b2b, -c1b, -c2b, -d1b, -d2b

# Common Vbles

r1, r2 = 0, 99
s1, s2 = 0, 99
t1, t2 = 0, 99
u1, u2 = 0, 99

n1, n2 = 0, 10000#700
o1, o2 = 0, 10000
p1, p2 = 0, 10000
q1, q2 = 0, 10000

for i in range(0,1000000):
    
    df1a = df1.loc[Space_dfa(df1,10),:]
    df1b = df1.loc[Space_dfb(list_idx_1,int(len(df1)/10)),:]
    df2a = df2.loc[Space_dfa(df2,10),:]
    df2b = df2.loc[Space_dfb(list_idx_2,int(len(df2)/10)),:]
    df3a = df3.loc[Space_dfa(df3,10),:]
    df3b = df3.loc[Space_dfb(list_idx_3,int(len(df3)/10)),:]

# Random vbles assignments
#-1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
#    a1b = random.choice([-1, 1,-1, 1, 1, 1])
#    a1s = -a1b
#    a2b = random.choice([-1, 1,-1, 1,-1, 1])
#    a2s = -a2b
    
#    b1b = random.choice([-1, 1,-1, 1, 1, 1])
#    b1s = -b1b
#    b2b = random.choice([-1, 1,-1, 1,-1, 1])
#    b2s = -b2b
    
#    c1b = random.choice([-1, 1,-1, 1, 1, 1])
#    c1s = -c1b
#    c2b = random.choice([-1, 1,-1, 1,-1, 1])
#    c2s = -c2b
    
#    d1b = random.choice([-1, 1,-1, 1, 1, 1])
#    d1s = -d1b
#    d2b = random.choice([-1, 1,-1, 1,-1, 1])
#    d2s = -d2b

    r1 = random.choice([23.6, 38.6])
    r2 = random.choice([50, 61.8])

    s1 = random.choice([23.6, 38.6])
    s2 = random.choice([50, 61.8])#([50, 61.8, 78.6, 90, 99])
    
    t1 = random.choice([23.6, 38.6])
    t2 = random.choice([50, 61.8])
    
    u1 = random.choice([23.6, 38.6])#([0, 23.6, 38.6, 50])
    u2 = random.choice([50, 61.8])

#    -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
#    n1 = random.randrange(700,3000,300)
#    n2 = random.randrange(1000,3000,250)
    
#    o1 = random.randrange(0,51,15)
#    o2 = random.randrange(350,501,25)
    
#    p1 = random.randrange(0,21,5)
#    p2 = random.randrange(170,221,10)

#    q1 = random.randrange(0,21,5)
#    q2 = random.randrange(70,110,5)
    
    if (i % 1000) == 0: print('-->',i)
    
    def analysis_b(df):
    
        x = df[(df['MS_retracement_M15'].between(-1000000, 1000000)) & 
           ((df['Trend_D1']  == a1b) | (df['Trend_D1']  == a2b)) & 
           ((df['Trend_H4']  == b1b) | (df['Trend_H4']  == b2b)) &
           ((df['Trend_H1']  == c1b) | (df['Trend_H1']  == c2b)) &
           ((df['Trend_M15'] == d1b) | (df['Trend_M15'] == d2b)) &
           #((df['Closing_D4']  == e1b)  | (df['Closing_D1'] == e2b)) &
           #((df['Closing_H4']  == f1b)  | (df['Closing_H4'] == f2b)) &
           #((df['Closing_H1']  == g1b)  | (df['Closing_H1'] == g2b)) &
           #((df['Closing_M15'] == h1b) | (df['Closing_M15'] == h2b)) &
           (df['MS_retracement_D1'] .between(r1, r2)) &
           (df['MS_retracement_H4'] .between(s1, s2)) & 
           (df['MS_retracement_H1'] .between(t1, t2)) &
           (df['MS_retracement_M15'].between(u1, u2)) &
           (df['MS_range_D1'] .between(n1, n2)) & 
           (df['MS_range_H4'] .between(o1, o2)) & 
           (df['MS_range_H1'] .between(p1, p2)) &
           (df['MS_range_M15'].between(q1, q2)) &
           (df['MS_retracement_M15'].between(-1000000, 1000000))]['Labelb_M15_H1'].describe()[:2]
        
        return x
        
    def analysis_s(df):
    
        y = df[(df['MS_retracement_M15'].between(-1000000, 1000000)) & 
           ((df['Trend_D1']  == a1s) | (df['Trend_D1']  == a2s)) & 
           ((df['Trend_H4']  == b1s) | (df['Trend_H4']  == b2s)) &
           ((df['Trend_H1']  == c1s) | (df['Trend_H1']  == c2s)) &
           ((df['Trend_M15'] == d1s) | (df['Trend_M15'] == d2s)) &
           #((df['Closing_D4']  == e1b)  | (df['Closing_D1'] == e2b)) &
           #((df['Closing_H4']  == f1b)  | (df['Closing_H4'] == f2b)) &
           #((df['Closing_H1']  == g1b)  | (df['Closing_H1'] == g2b)) &
           #((df['Closing_M15'] == h1b) | (df['Closing_M15'] == h2b)) &
           (df['MS_retracement_D1'] .between(r1, r2)) &
           (df['MS_retracement_H4'] .between(s1, s2)) & 
           (df['MS_retracement_H1'] .between(t1, t2)) &
           (df['MS_retracement_M15'].between(u1, u2)) &
           (df['MS_range_D1'] .between(n1, n2)) & 
           (df['MS_range_H4'] .between(o1, o2)) & 
           (df['MS_range_H1'] .between(p1, p2)) &
           (df['MS_range_M15'].between(q1, q2)) &
           (df['MS_retracement_M15'].between(-1000000, 1000000))]['Labels_M15_H1'].describe()[:2]

        return y
    
    xa1 = analysis_b(df1a)
    xb1 = analysis_b(df1b)
    xa2 = analysis_b(df2a)
    xb2 = analysis_b(df2b)
    xa3 = analysis_b(df3a)
    xb3 = analysis_b(df3b)
    
    ya1 = analysis_s(df1a)
    yb1 = analysis_s(df1b)
    ya2 = analysis_s(df2a)
    yb2 = analysis_s(df2b)
    ya3 = analysis_s(df3a)
    yb3 = analysis_s(df3b)
    
    par_a = 0.30
    par_b = 1
    par_c = 0.27
    par_d = 1
    
    count_b = (xa1[0]+xb1[0]+xa2[0]+xb2[0]+xa3[0]+xb3[0])/6
    count_b = (xb1[0]+xb2[0]+xb3[0])/3
    mean_b  = (xa1[0]*xa1[1]+xb1[0]*xb1[1]+xa2[0]*xa2[1]+xb2[0]*xb2[1]+xa3[0]*xa3[1]+xb3[0]*xb3[1])/(6*count_b)
    mean_b  = (xb1[0]*xb1[1]+xb2[0]*xb2[1]+xb3[0]*xb3[1])/(3*count_b)
    
    count_s = (ya1[0]+yb1[0]+ya2[0]+yb2[0]+ya3[0]+yb3[0])/6
    count_s = (yb1[0]+yb2[0]+yb3[0])/3
    mean_s  = (ya1[0]*ya1[1]+yb1[0]*yb1[1]+ya2[0]*ya2[1]+yb2[0]*yb2[1]+ya3[0]*ya3[1]+yb3[0]*yb3[1])/(6*count_s) 
    mean_s  = (yb1[0]*yb1[1]+yb2[0]*yb2[1]+yb3[0]*yb3[1])/(3*count_s)
    
    if ((mean_b > par_a) & (mean_s > par_a) & (count_b > par_b) & (count_s > par_b) & (xa1[1] > par_c) & (xa1[0] > par_d) & (xb1[1] > par_c) & (xb1[0] > par_d) & (xa2[1] > par_c) & (xa2[0] > par_d) & (xb2[1] > par_c) & (xb2[0] > par_d) & (xa3[1] > par_c) & (xa3[0] > par_d) & (xb3[1] > par_c) & (xb3[0] > par_d) & (ya1[1] > par_c) & (ya1[0] > par_d) & (yb1[1] > par_c) & (yb1[0] > par_d) & (ya2[1] > par_c) & (ya2[0] > par_d) & (yb2[1] > par_c) & (yb2[0] > par_d) & (ya3[1] > par_c) & (ya3[0] > par_d) & (yb3[1] > par_c) & (yb3[0] > par_d)):
    #if ((mean_b > par_a) & (mean_s > par_a) & (count_b > par_b) & (count_s > par_b) & (xb1[1] > par_c) & (xb1[0] > par_d) & (xb2[1] > par_c) & (xb2[0] > par_d) & (xb3[1] > par_c) & (xb3[0] > par_d) & (yb1[1] > par_c) & (yb1[0] > par_d) & (yb2[1] > par_c) & (yb2[0] > par_d) & (yb3[1] > par_c) & (yb3[0] > par_d)):
        
        print(i)
        print('TOTAL SCORE BUY  --- MEAN:',mean_b,' --- COUNT:',count_b)
        print('TOTAL SCORE SELL --- MEAN:',mean_s,' --- COUNT:',count_s)
        print('BUY VBLES   :','(',a1b,a2b,')(',b1b,b2b,')(',c1b,c2b,')(',d1b,d2b,')(',h1b,h2b,')')
        print('SELL VBLES  :','(',a1s,a2s,')(',b1s,b2s,')(',c1s,c2s,')(',d1s,d2s,')')
        print('COMMON VBLES:','(',r1,r2,')(',s1,s2,')(',t1,t2,')(',u1,u2,')n(',n1,n2,')o(',o1,o2,')p(',p1,p2,')q(',q1,q2,')')
        print('BUY DFRAMES:')
        print('1a',i, xa1[0],xa1[1])
        print('1b',i, xb1[0],xb1[1])
        print('2a',i, xa2[0],xa2[1])
        print('2b',i, xb2[0],xb2[1])
        print('3a',i, xa3[0],xa3[1])
        print('3b',i, xb3[0],xb3[1])
        print('SELL DFRAMES:')
        print('1a',i, ya1[0],ya1[1])
        print('1b',i, yb1[0],yb1[1])
        print('2a',i, ya2[0],ya2[1])
        print('2b',i, yb2[0],yb2[1])
        print('3a',i, ya3[0],ya3[1])
        print('3b',i, yb3[0],yb3[1])

In [None]:
# BRUTAL - DOES NOT WORK
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# BRUTAL 2 - DOES NOT WORK
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# BRUTAL 3 - RCF WORKS ON BUY - NOT ON SELL df2
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# RCF WORKS ON BUY - ALMOST ON SELL df2
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)

In [None]:
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 2200, 0, 400, 0, 180, 0, 10000)

In [None]:
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1700, 0, 400, 0, 180, 0, 10000)

In [None]:
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 380, 0, 180, 0, 10000)

In [None]:
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 420, 0, 180, 0, 10000)

In [None]:
# RCF NOT WORKING
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 400, 0, 160, 0, 10000)

In [None]:




Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 400, 0, 200, 0, 10000)

In [None]:
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 400, 10, 180, 0, 10000)

In [None]:
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 20, 400, 0, 180, 0, 10000)

In [None]:
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 78.6, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)

In [None]:
# DOES NOT WORK
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 90,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)

In [None]:
# DOES NOT WORK
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  23.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)

In [None]:
# DOES NOT WORK
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 78.6, 0, 1900, 0, 400, 0, 180, 0, 10000)

In [None]:
# DOES NOT WORK
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 70, 0, 78.6,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# Brutal
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 70, 0, 78.6,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# Brutal
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 78.6, 0, 78.6,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# BRUTAL 4 # DOES NOT WORK
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 90,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# DOES NOT WORK
Auto_Analysis(df, 50, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  23.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
-1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)

-1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 78.6, 0, 78.6,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
-1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 90,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)
-1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  23.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)

In [None]:
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 78.6, 0, 90,  38.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)

In [None]:
#RFC DIDN T WORK
Auto_Analysis(df, 50, 'M15_H1', 'print', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 90,  23.6, 61.8, 0, 10000, 0, 400, 0, 180, 0, 10000)

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
TOTAL SCORE BUY  --- MEAN: 0.34763948497854075  --- COUNT: 77.66666666666667
TOTAL SCORE SELL --- MEAN: 0.3562231759656652  --- COUNT: 77.66666666666667
BUY VBLES   : ( -1 1 )( -1 1 )( -1 1 )( -1 1 )( -1 1 )
SELL VBLES  : ( 1 -1 )( 1 -1 )( 1 -1 )( 1 -1 )
COMMON VBLES: ( 38.6 50 )( 23.6 61.8 )( 23.6 61.8 )( 38.6 50 )n( 0 10000 )o( 0 10000 )p( 0 10000 )q( 0 10000 )

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, -1, 1, -1, 1, 1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, -1, 1, -1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 23.6, 61.8,  38.6, 50, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
#######################

#### Solutions M15_H1

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 23.6, 90, 0, 99, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 23.6, 90, 0, 99, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 50, 38.6, 61.8,  23.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 50, 38.6, 61.8,  23.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# GOOOOD
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 50, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 50, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  0, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  0, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',             -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  23.6, 50, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 100, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  23.6, 50, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',             -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  23.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 100, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  23.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, -1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, -1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, -1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 1900, 0, 400, 0, 180, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',             -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 100, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 61.8, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',             -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 38.6, 61.8,  0, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 100, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 38.6, 61.8,  0, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# GOOOOD ++++ # DOES NOT WORK
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 38.6, 61.8,  23.6, 61.8, 0, 2100, 0, 460, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 38.6, 61.8,  23.6, 61.8, 0, 2100, 0, 460, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, -1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 50, 61.8,  23.6, 61.8, 0, 2100, 0, 460, 0, 10000, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 30, 'M15_H1', 'ret', -1, 1, 1, 1, -1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 50, 61.8,  23.6, 61.8, 0, 2100, 0, 460, 0, 10000, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',             -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 38.6, 61.8,  23.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 100, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 38.6, 61.8,  23.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',             -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 100, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 78.6, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',             -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 90, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 100, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 90, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
# GOOD
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  23.6, 61.8, 0, 1900, 0, 450, 0, 10000, 0, 10000)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  23.6, 61.8, 0, 1900, 0, 450, 0, 10000, 0, 10000)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  0, 61.8, 0, 2500, 50, 350, 25, 220, 15, 119)
#df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  0, 61.8, 0, 2500, 50, 350, 25, 220, 15, 119)
#print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  0, 78.6, 0, 2800, 0, 350, 50, 220, 20, 120)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  0, 78.6, 0, 2800, 0, 350, 50, 220, 20, 120)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  23.6, 99, 0, 1600, 50, 450, 0, 210, 20, 119)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  23.6, 99, 0, 1600, 50, 450, 0, 210, 20, 119)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.6, 61.8, 0, 99, 38.6, 61.8,  38.6, 61.8, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 100, 'M15_H1', 'print',            -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 78.6, 50, 61.8, 23.6, 78.6, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 78.6, 50, 61.8, 23.6, 78.6, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 78.6, 50, 61.8, 38.2, 99, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 78.6, 50, 61.8, 38.2, 99, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 78.6, 50, 78.6, 38.2, 90, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 78.6, 50, 78.6, 38.2, 90, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 90, 50, 61.8, 38.2, 99, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 90, 50, 61.8, 38.2, 99, 0, 10000, 0, 10000, 0, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 99, 50, 61.8, 38.2, 78.6, 0, 1300, 0, 10000, 25, 10000, 0, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, -1, 1, 1, 1, 38.2, 90, 23.6, 99, 50, 61.8, 38.2, 78.6, 0, 1300, 0, 10000, 25, 10000, 0, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

In [None]:
Auto_Analysis(df, 30, 'M15_H1', 'print',            -1, 1, 1, 1, 1, 1, 1, 1, 38.2, 90, 23.6, 99, 50, 61.8, 38.2, 78.6, 0, 1000, 0, 10000, 0, 10000, 25, 10000)
df_b, df_s = Auto_Analysis(df, 50, 'M15_H1', 'ret', -1, 1, 1, 1, 1, 1, 1, 1, 38.2, 90, 23.6, 99, 50, 61.8, 38.2, 78.6, 0, 1000, 0, 10000, 0, 10000, 25, 10000)
print('Total months:',diff_month(df),',months w/ points:',N_months_concat(df_b, df_s),'(',round(100*(N_months_concat(df_b, df_s)/diff_month(df)),1),'%)')

## 2nd Part of the Strategy - AI
### Functions

In [None]:
def get_redundant_pairs(df):
    '''Get diagonal and lower triangular pairs of correlation matrix'''
    pairs_to_drop = set()
    cols = df.columns
    for i in range(0, df.shape[1]):
        for j in range(0, i+1):
            pairs_to_drop.add((cols[i], cols[j]))
    return pairs_to_drop

def get_top_abs_correlations(df, n):
    au_corr = df.corr().abs().unstack()
    labels_to_drop = get_redundant_pairs(df)
    au_corr = au_corr.drop(labels=labels_to_drop).sort_values(ascending=False)
    return au_corr[0:n]

In [None]:
def roc(model, X_test, y_test):

    import numpy as np
    from sklearn.metrics import roc_curve, roc_auc_score
    from matplotlib import pyplot

    ## generate a no skill prediction (majority class)
    ns_probs = [0 for _ in range(len(y_test))]

    ## predict probabilities
    lr_probs = model.predict_proba(X_test)
    ## keep probabilities for the positive outcome only
    lr_probs = lr_probs[:, 1]
    ## calculate scores
    ns_auc = roc_auc_score(y_test, ns_probs)
    lr_auc = roc_auc_score(y_test, lr_probs)
    ## summarize scores
    print('No Skill   : ROC AUC=%.3f' % (ns_auc))
    print('Rand Forest: ROC AUC=%.3f' % (lr_auc))
    ## calculate roc curves
    ns_fpr, ns_tpr, _ = roc_curve(y_test, ns_probs)
    lr_fpr, lr_tpr, _ = roc_curve(y_test, lr_probs)
    ## plot the roc curve for the model
    pyplot.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill')
    pyplot.plot(lr_fpr, lr_tpr, marker='.', label='Rand Forest')
    ## axis labels
    pyplot.xlabel('False Positive Rate')
    pyplot.ylabel('True Positive Rate')
    ## show the legend
    pyplot.legend()
    ## show the plot
    pyplot.show()

In [None]:
def split(df1,df2,df3,Label):
    
    from sklearn.model_selection import train_test_split
    from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor

    df_frames = [df1, df2]
    frames = pd.concat(df_frames)

    X = frames.drop([Label],axis=1)[:]
    y = frames[Label][:]

    X2 = df3.drop([Label],axis=1)
    y2 = df3[Label]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.99, shuffle = True, random_state = 101)
    
    return X_train, X_test, y_train, y_test, X2, y2

In [None]:
def model_rfc_random(X_train,y_train):
    
    from sklearn.model_selection import train_test_split, RandomizedSearchCV
    from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
    from sklearn.datasets import make_classification

    # Number of trees in random forest
    n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 4)]
    # Number of features to consider at every split
    max_features = ['auto', 'sqrt']
    # Maximum number of levels in tree
    max_depth = [int(x) for x in np.linspace(10, 110, num = 4)]
    max_depth.append(None)
    # Minimum number of samples required to split a node
    min_samples_split = [2, 5, 10]
    # Minimum number of samples required at each leaf node
    min_samples_leaf = [1, 2, 4]
    # Method of selecting samples for training each tree
    bootstrap = [True, False]
    # Create the random grid
    random_grid = {'n_estimators': n_estimators,
                   'max_features': max_features,
                   'max_depth': max_depth,
                   'min_samples_split': min_samples_split,
                   'min_samples_leaf': min_samples_leaf,
                   'bootstrap': bootstrap}

    print(random_grid)

    # Use the random grid to search for best hyperparameters
    # First create the base model to tune
    rfc = RandomForestClassifier()
    # Random search of parameters, using 3 fold cross validation, 
    # search across 100 different combinations, and use all available cores
    rfc_random = RandomizedSearchCV(estimator = rfc, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
    
    # Fit the random search model
    rfc_random.fit(X_train,y_train)
    
    print('Best Parameters:', rfc_random.best_params_)
    
    return rfc_random.best_estimator_

In [None]:
def predictions(model, X_test, y_test, thres):

    from sklearn.metrics import classification_report,confusion_matrix

    print('Predictions - Threshold >=',thres)
    predicted_proba = model.predict_proba(X_test)
    predictions = (predicted_proba [:,1] >= thres).astype('int')
    print(classification_report(y_test,predictions))
    print(confusion_matrix(y_test,predictions))

In [None]:
def top_cols(model, X_train, threshold):

    feature_names = [f'{col}' for col in X_train.columns]
    importances = model.feature_importances_
    forest_importances = pd.Series(importances, index=feature_names)
    
    return list(forest_importances[forest_importances > threshold].sort_values(ascending = False).index[:])

In [None]:
def remove_corr(df, ind_cols, th):

    corr_list_top   = get_top_abs_correlations(df[ind_cols], 150)
    corr_list_top_f = corr_list_top[corr_list_top > th].index[:]
    
    corr_list =[]
    
    for i in range(0,len(corr_list_top_f)):
        corr_list.append(corr_list_top_f[i][1])
    
    corr_list = list(set(corr_list))
    ret_list  = [x for x in ind_cols if x in corr_list]
    
    return ret_list

In [None]:
def Pre_rec_curve(model, X_test, y_test, thres):
    
    from sklearn.metrics import precision_recall_curve, accuracy_score, precision_score, average_precision_score
    from sklearn.metrics import plot_precision_recall_curve
    import matplotlib.pyplot as plt

    predicted_proba = model.predict_proba(X_test)
    predicted = (predicted_proba [:,1] >= thres).astype('int')

    accuracy  = accuracy_score(y_test, predicted)
    precision = precision_score(y_test, predicted)

    disp = plot_precision_recall_curve(model, X_test, y_test)
    disp.ax_.set_title('2-class Precision-Recall curve: '
                       'AP={0:0.2f}'.format(precision))

In [None]:
def opt_thres(model, X_test1, y_test1, X_test2, y_test2, X_test3, y_test3, pr):
    
    predicted_proba = model.predict_proba(X_test)
    
    for i in np.arange(0,1,0.02):
        
        predicted_proba1 = model.predict_proba(X_test1)
        predicted1 = (predicted_proba1 [:,1] >= i).astype('int')
        if precision_score(y_test1, predicted1) > pr:
            break
            
    print(i, precision_score(y_test1, predicted1))

    for j in np.arange(0,1,0.02):
        
        predicted_proba2 = model.predict_proba(X_test2)
        predicted2 = (predicted_proba2 [:,1] >= j).astype('int')
        if precision_score(y_test2, predicted2) > pr:
            break
            
    print(j, precision_score(y_test2, predicted2))

    for k in np.arange(0,1,0.02):
        
        predicted_proba3 = model.predict_proba(X_test3)
        predicted3 = (predicted_proba3 [:,1] >= k).astype('int')
        if precision_score(y_test3, predicted3) > pr:
            break
            
    print(k, precision_score(y_test3, predicted3))
    
    t = max(i,j,k)
    print ('Threshold:',t) 
            
    return t

### Prepare the data

In [None]:
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor

# Read, clean and split the df in 3 for H1_b, H1_s, H1_H4_b, H1_H4_s

dfv = pd.read_pickle('dffv_EURUSD.pkl')
df = clean_data(dfv)

### Explore and drop corralated data

In [None]:
ind_cols =  ['RSI_14_SMA_H1', 'RSI_14_EMA_H1', 'EMA_12_macd_H1', 'EMA_26_macd_H1',
             'MACD_12_26_H1', 'EMA_MACD_12_26_9_H1', 'Hist_MACD_12_26_9_H1', 'MACD_signal_H1', 
             'Boll_SMA_20_H1', 'Boll_SMA_20_Var_-2_H1', 'Boll_SMA_20_Var_+2_H1', 'Dist_BollB_SMA_20_Var_+2_H1', 
             'Dist_BollB_SMA_20_Var_-2_H1', 'BollB_Wideness_H1',
             'RSI_14_SMA_H4', 'RSI_14_EMA_H4', 'EMA_12_macd_H4', 'EMA_26_macd_H4',
             'MACD_12_26_H4', 'EMA_MACD_12_26_9_H4', 'Hist_MACD_12_26_9_H4', 'MACD_signal_H4', 
             'Boll_SMA_20_H4', 'Boll_SMA_20_Var_-2_H4', 'Boll_SMA_20_Var_+2_H4', 'Dist_BollB_SMA_20_Var_+2_H4', 
             'Dist_BollB_SMA_20_Var_-2_H4', 'BollB_Wideness_H4',
             'RSI_14_SMA_D1', 'RSI_14_EMA_D1', 'EMA_12_macd_D1', 'EMA_26_macd_D1',
             'MACD_12_26_D1', 'EMA_MACD_12_26_9_D1', 'Hist_MACD_12_26_9_D1', 'MACD_signal_D1', 
             'Boll_SMA_20_D1', 'Boll_SMA_20_Var_-2_D1', 'Boll_SMA_20_Var_+2_D1', 'Dist_BollB_SMA_20_Var_+2_D1', 
             'Dist_BollB_SMA_20_Var_-2_D1', 'BollB_Wideness_D1']

print("Top Absolute Correlations")
print(get_top_abs_correlations(df[ind_cols], 150))

In [None]:
#Corrolated columns that need dropping

corr_cols = remove_corr(df, ind_cols, 0.98)

print("Top Absolute Correlations")
print(get_top_abs_correlations(df[list(set(ind_cols)-set(corr_cols))], 300))

In [None]:
# Plot correlations between columns

import seaborn as sns
import matplotlib.pyplot as plt

ind_cols = list(set(ind_cols)-set(corr_cols))

df_corr = df[ind_cols].corr().round(2)

kot = df_corr[df_corr>=.0]
fig, ax = plt.subplots(figsize=(12,12))
ax = sns.heatmap(kot, vmin=-1, vmax=1, square=1)

In [None]:
# Drop correlated columns

#df.drop(corr_cols, axis = 1, inplace = True)

### Filtration process

In [None]:
# Split the Data in 3 dfs

df3, df2, df1 = split3_dfs(df)

In [None]:
# Create filtered dataframes for M15_H1
df1_filt_b_M15_H1, df1_filt_s_M15_H1 = Auto_Analysis(df1, 50, 'M15_H1', 'ret', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 400, 0, 200, 0, 10000)
df2_filt_b_M15_H1, df2_filt_s_M15_H1 = Auto_Analysis(df2, 50, 'M15_H1', 'ret', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 400, 0, 200, 0, 10000)
df3_filt_b_M15_H1, df3_filt_s_M15_H1 = Auto_Analysis(df3, 50, 'M15_H1', 'ret', -1, 1, -1, 1, 1, 1, -1, 1, 38.6, 50, 23.6, 61.8, 0, 78.6,  38.6, 61.8, 0, 1900, 0, 400, 0, 200, 0, 10000)

b_tot  = df1_filt_b_M15_H1.shape[0] + df2_filt_b_M15_H1.shape[0] + df3_filt_b_M15_H1.shape[0]
s_tot  = df1_filt_s_M15_H1.shape[0] + df2_filt_s_M15_H1.shape[0] + df3_filt_s_M15_H1.shape[0]
b_perc = (df1_filt_b_M15_H1[df1_filt_b_M15_H1['Labelb_M15_H1'] == 1].count()[0] + df2_filt_b_M15_H1[df2_filt_b_M15_H1['Labelb_M15_H1'] == 1].count()[0] + df3_filt_b_M15_H1[df3_filt_b_M15_H1['Labelb_M15_H1'] == 1].count()[0])/b_tot
s_perc = (df1_filt_s_M15_H1[df1_filt_s_M15_H1['Labelb_M15_H1'] == 1].count()[0] + df2_filt_s_M15_H1[df2_filt_s_M15_H1['Labels_M15_H1'] == 1].count()[0] + df3_filt_s_M15_H1[df3_filt_s_M15_H1['Labels_M15_H1'] == 1].count()[0])/s_tot

print('M15_H1 BUY   Shapes df1, df2, df3: ',df1_filt_b_M15_H1.shape,df2_filt_b_M15_H1.shape,df3_filt_b_M15_H1.shape)
print('M15_H1 BUY   Total:',b_tot,'points')
print('M15_H1 BUY   Overall performance:',round(100 * b_perc,2),'%\n')
print('M15_H1 SELL  Shapes df1, df2, df3: ',df1_filt_s_M15_H1.shape,df2_filt_s_M15_H1.shape,df3_filt_s_M15_H1.shape)
print('M15_H1 SELL  Total:',s_tot,'points')
print('M15_H1 SELL  Overall performance:',round(100 * s_perc,2),'%\n')
print('TOTAL POINTS:',b_tot + s_tot,'of',len(df),'(',round(100*(b_tot + s_tot)/len(df),2),'%)')
print('TOTAL PERFORMANCE:',round((100 * (((b_perc*b_tot) + (s_perc*s_tot))/(b_tot+s_tot))),2),'%\n\n')

# Create filtered dataframes for M15_H4
df1_filt_b_M15_H4, df1_filt_s_M15_H4 = Auto_Analysis(df1, 50, 'M15_H4', 'ret', 1, 1, 1, 1, -1, 1, -1, 1,0, 78.6, 23.6, 78.6, 0, 90, 0, 99, 0, 1200, 280, 10000, 0, 10000, 0, 10000)
df2_filt_b_M15_H4, df2_filt_s_M15_H4 = Auto_Analysis(df2, 50, 'M15_H4', 'ret', 1, 1, 1, 1, -1, 1, -1, 1,0, 78.6, 23.6, 78.6, 0, 90, 0, 99, 0, 1200, 280, 10000, 0, 10000, 0, 10000)
df3_filt_b_M15_H4, df3_filt_s_M15_H4 = Auto_Analysis(df3, 50, 'M15_H4', 'ret', 1, 1, 1, 1, -1, 1, -1, 1,0, 78.6, 23.6, 78.6, 0, 90, 0, 99, 0, 1200, 280, 10000, 0, 10000, 0, 10000)

b_tot  = df1_filt_b_M15_H4.shape[0] + df2_filt_b_M15_H4.shape[0] + df3_filt_b_M15_H4.shape[0]
s_tot  = df1_filt_s_M15_H4.shape[0] + df2_filt_s_M15_H4.shape[0] + df3_filt_s_M15_H4.shape[0]
b_perc = (df1_filt_b_M15_H4[df1_filt_b_M15_H4['Labelb_M15_H4'] == 1].count()[0] + df2_filt_b_M15_H4[df2_filt_b_M15_H4['Labelb_M15_H4'] == 1].count()[0] + df3_filt_b_M15_H4[df3_filt_b_M15_H4['Labelb_M15_H4'] == 1].count()[0])/b_tot
s_perc = (df1_filt_s_M15_H4[df1_filt_s_M15_H4['Labels_M15_H4'] == 1].count()[0] + df2_filt_s_M15_H4[df2_filt_s_M15_H4['Labels_M15_H4'] == 1].count()[0] + df3_filt_s_M15_H4[df3_filt_s_M15_H4['Labels_M15_H4'] == 1].count()[0])/s_tot

print('M15_H4 BUY   Shapes df1, df2, df3: ',df1_filt_b_M15_H4.shape,df2_filt_b_M15_H4.shape,df3_filt_b_M15_H4.shape)
print('M15_H4 BUY   Total:',b_tot,'points')
print('M15_H4 BUY   Overall performance:',round(100 * b_perc,2),'%\n')
print('M15_H4 SELL  Shapes df1, df2, df3: ',df1_filt_s_M15_H4.shape,df2_filt_s_M15_H4.shape,df3_filt_s_M15_H4.shape)
print('M15_H4 SELL  Total:',s_tot,'points')
print('M15_H4 SELL  Overall performance:',round(100 * s_perc,2),'%\n')
print('TOTAL POINTS:',b_tot + s_tot,'of',len(df),'(',round(100*(b_tot + s_tot)/len(df),2),'%)')
print('TOTAL PERFORMANCE:',round((100 * (((b_perc*b_tot) + (s_perc*s_tot))/(b_tot+s_tot))),2),'%')

In [None]:
print('Buy  df1_M15_H1',N_months(df1_filt_b_M15_H1),'months out of', diff_month(df1))
print('Buy  df2_M15_H1',N_months(df2_filt_b_M15_H1),'months out of', diff_month(df2))
print('Buy  df3_M15_H1',N_months(df3_filt_b_M15_H1),'months out of', diff_month(df3),'\n')

print('Sell df1_M15_H1',N_months(df1_filt_s_M15_H1),'months out of', diff_month(df1))
print('Sell df2_M15_H1',N_months(df2_filt_s_M15_H1),'months out of', diff_month(df2))
print('Sell df3_M15_H1',N_months(df3_filt_s_M15_H1),'months out of', diff_month(df3),'\n')

print('Buy  df1_M15_H4',N_months(df1_filt_b_M15_H4),'months out of', diff_month(df1))
print('Buy  df2_M15_H4',N_months(df2_filt_b_M15_H4),'months out of', diff_month(df2))
print('Buy  df3_M15_H4',N_months(df3_filt_b_M15_H4),'months out of', diff_month(df3),'\n')

print('Sell df1_M15_H4',N_months(df1_filt_s_M15_H4),'months out of', diff_month(df1))
print('Sell df2_M15_H4',N_months(df2_filt_s_M15_H4),'months out of', diff_month(df2))
print('Sell df3_M15_H4',N_months(df3_filt_s_M15_H4),'months out of', diff_month(df3),'\n')

print('Total df1:',N_months_concat4(df1_filt_b_M15_H1, df1_filt_s_M15_H1, df1_filt_b_M15_H4, df1_filt_s_M15_H4),'months out of',diff_month(df1))
print('Total df2:',N_months_concat4(df2_filt_b_M15_H1, df2_filt_s_M15_H1, df2_filt_b_M15_H4, df2_filt_s_M15_H4),'months out of',diff_month(df2))
print('Total df3:',N_months_concat4(df3_filt_b_M15_H1, df3_filt_s_M15_H1, df3_filt_b_M15_H4, df3_filt_s_M15_H4),'months out of',diff_month(df3))

In [None]:
# Reset index M15_H1
df1_filt_b_M15_H1, df1_filt_s_M15_H1 = df1_filt_b_M15_H1.reset_index(drop=True), df1_filt_s_M15_H1.reset_index(drop=True)
df2_filt_b_M15_H1, df2_filt_s_M15_H1 = df2_filt_b_M15_H1.reset_index(drop=True), df2_filt_s_M15_H1.reset_index(drop=True)
df3_filt_b_M15_H1, df3_filt_s_M15_H1 = df3_filt_b_M15_H1.reset_index(drop=True), df3_filt_s_M15_H1.reset_index(drop=True)

# Reset index M15_H4
df1_filt_b_M15_H4, df1_filt_s_M15_H4 = df1_filt_b_M15_H4.reset_index(drop=True), df1_filt_s_M15_H4.reset_index(drop=True)
df2_filt_b_M15_H4, df2_filt_s_M15_H4 = df2_filt_b_M15_H4.reset_index(drop=True), df2_filt_s_M15_H4.reset_index(drop=True)
df3_filt_b_M15_H4, df3_filt_s_M15_H4 = df3_filt_b_M15_H4.reset_index(drop=True), df3_filt_s_M15_H4.reset_index(drop=True)

# List of generic columns required to be drop:

drop_gen_cols = ['index', 'Price_M15', 'MS_N_M15','MS_N_H1', 'MS_N_H4', 'MS_N_D1',
                 #'Trend_M15','Trend_H1','Trend_H4','Trend_D1',
                 'MS_Sit_M15','MS_Sit_H1', 'MS_Sit_H4', 'MS_Sit_D1',
                 'Date_M15', 'Open_M15', 'High_M15', 'Low_M15', 'Close_M15', 'MS_L_M15', 'MS_H_M15',
                 'Date_H1' , 'Open_H1' , 'High_H1' , 'Low_H1' , 'Close_H1' , 'MS_L_H1' , 'MS_H_H1',
                 'Date_H4' , 'Open_H4' , 'High_H4' , 'Low_H4' , 'Close_H4' , 'MS_L_H4' , 'MS_H_H4',
                 'Date_D1' , 'Open_D1' , 'High_D1' , 'Low_D1' , 'Close_D1' , 'MS_L_D1' , 'MS_H_D1',
                 'B_Stop_Loss_M15_H1', 'B_Limit_M15_H1', 'S_Stop_Loss_M15_H1', 'S_Limit_M15_H1',
                 'B_Stop_Loss_M15_H4', 'B_Limit_M15_H4', 'S_Stop_Loss_M15_H4', 'S_Limit_M15_H4']

#Additonal columns to be dropped in each dataframe:

drop_M15_H1_b_cols = ['Labels_M15_H1', 'Labelb_M15_H4', 'Labels_M15_H4'] # ['Labelb_M15_H1'] remains
drop_M15_H1_s_cols = ['Labelb_M15_H1', 'Labelb_M15_H4', 'Labels_M15_H4'] # ['Labels_M15_H1'] remains

drop_M15_H4_b_cols = ['Labelb_M15_H1', 'Labels_M15_H1', 'Labels_M15_H4'] # ['Labelb_M15_H4'] remains
drop_M15_H4_s_cols = ['Labelb_M15_H1', 'Labels_M15_H1', 'Labelb_M15_H4'] # ['Labels_M15_H4'] remains

# Drop the unnecessary columns

df1_filt_b_M15_H1.drop(drop_gen_cols + drop_M15_H1_b_cols, axis=1, inplace =True)
df2_filt_b_M15_H1.drop(drop_gen_cols + drop_M15_H1_b_cols, axis=1, inplace =True)
df3_filt_b_M15_H1.drop(drop_gen_cols + drop_M15_H1_b_cols, axis=1, inplace =True)

df1_filt_s_M15_H1.drop(drop_gen_cols + drop_M15_H1_s_cols, axis=1, inplace =True)
df2_filt_s_M15_H1.drop(drop_gen_cols + drop_M15_H1_s_cols, axis=1, inplace =True)
df3_filt_s_M15_H1.drop(drop_gen_cols + drop_M15_H1_s_cols, axis=1, inplace =True)

df1_filt_b_M15_H4.drop(drop_gen_cols + drop_M15_H4_b_cols, axis=1, inplace =True)
df2_filt_b_M15_H4.drop(drop_gen_cols + drop_M15_H4_b_cols, axis=1, inplace =True)
df3_filt_b_M15_H4.drop(drop_gen_cols + drop_M15_H4_b_cols, axis=1, inplace =True)

df1_filt_s_M15_H4.drop(drop_gen_cols + drop_M15_H4_s_cols, axis=1, inplace =True)
df2_filt_s_M15_H4.drop(drop_gen_cols + drop_M15_H4_s_cols, axis=1, inplace =True)
df3_filt_s_M15_H4.drop(drop_gen_cols + drop_M15_H4_s_cols, axis=1, inplace =True)

# Columns left so far:

#Trend_H1,
#Labelb_H1, Labels_H1, Labelb_H1_H4, Labels_H1_H4,
#Closing_X, MS_Sit_X, N_Breaks_X, MS_Pds_X, MS_range_X, MS_retracement_X
#RSI_14_SMA_X, RSI_14_EMA, EMA_12_macd_X, EMA_26_macd,
#MACD_12_26_X, EMA_MACD_12_26_9_X, Hist_MACD_12_26_9_X, MACD_signal_X
#Boll_SMA_20_X, Boll_SMA_20_Var_-2_X, Boll_SMA_20_Var_+2_X,
#Dist_BollB_SMA_20_Var_+2_X, Dist_BollB_SMA_20_Var_-2_XH1, BollB_Wideness_X

print('H1 BUY  Shapes df1, df2, df3: ',df1_filt_b_M15_H1.shape,df2_filt_b_M15_H1.shape,df3_filt_b_M15_H1.shape)
print('H1 SELL Shapes df1, df2, df3: ',df1_filt_s_M15_H1.shape,df2_filt_s_M15_H1.shape,df3_filt_s_M15_H1.shape)

print('H4 BUY  Shapes df1, df2, df3: ',df1_filt_b_M15_H4.shape,df2_filt_b_M15_H4.shape,df3_filt_b_M15_H4.shape)
print('H4 SELL Shapes df1, df2, df3: ',df1_filt_s_M15_H4.shape,df2_filt_s_M15_H4.shape,df3_filt_s_M15_H4.shape)

### Random Forest Modelling
#### [M15_H1] BUY - Explores Best RFC Model

In [None]:
Label = 'Labelb_M15_H1'

df_1  = df1_filt_b_M15_H1
df_2  = df2_filt_b_M15_H1
df_3  = df3_filt_b_M15_H1

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_b1 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_b1.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_b1, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_b1, X_test = X2, y_test = y2, thres = 0.5)

In [None]:
Label = 'Labelb_M15_H1'

df_1  = df2_filt_b_M15_H1
df_2  = df3_filt_b_M15_H1
df_3  = df1_filt_b_M15_H1

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_b2 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_b2.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_b2, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_b2, X_test = X2, y_test = y2, thres = 0.2)

In [None]:
Label = 'Labelb_M15_H1'

df_1  = df1_filt_b_M15_H1
df_2  = df3_filt_b_M15_H1
df_3  = df2_filt_b_M15_H1

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_b3 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_b3.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_b3, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_b3, X_test = X2, y_test = y2, thres = 0.5)

#### [M15_H1] BUY - Optimum RFC Model

The chosen model is rcf_b2 

In [None]:
# Load rfc_opt_b_H1 model
with open('rfc_opt_b_H1.pkl', 'rb') as f:
    rfc_opt_b_H1 = pickle.load(f)

In [None]:
#rfc_opt_b_H1 = rfc_b2

In [None]:
# Chosen rcf is rcf_b2
# Apply Random grid search to obtain optimum parameters on chosen model

Label = 'Labelb_M15_H1'

df_1  = df2_filt_b_M15_H1
df_2  = df3_filt_b_M15_H1
df_3  = df1_filt_b_M15_H1

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Returns the optimum random search model
rfc_opt_b_H1 = model_rfc_random(X_train,y_train)

# Plots the ROC curve
roc(rfc_opt_b_H1, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc_opt_b_H1, X_test = X2, y_test = y2, thres = 0.3)

# save model to a file
with open('rfc_opt_b_H1.pkl', 'wb') as f:
    pickle.dump(rfc_opt_b_H1, f)

#### [M15_H1] BUY - Optimum model testing

In [None]:
# Test on remaining dataframes
Label = 'Labelb_M15_H1'

X1 = df1_filt_b_M15_H1.drop([Label],axis=1)
y1 = df1_filt_b_M15_H1[Label]

X2 = df2_filt_b_M15_H1.drop([Label],axis=1)
y2 = df2_filt_b_M15_H1[Label]

X3 = df3_filt_b_M15_H1.drop([Label],axis=1)
y3 = df3_filt_b_M15_H1[Label]

thres = opt_thres(rfc_opt_b_H1, X1, y1, X2, y2, X3, y3, pr = 0.5)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_b_H1, X1, y1)

# Predicts
predictions(rfc_opt_b_H1, X_test = X1, y_test = y1, thres = thres)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_b_H1, X2, y2)

# Predicts
predictions(rfc_opt_b_H1, X_test = X2, y_test = y2, thres = thres)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_b_H1, X3, y3)

# Predicts
predictions(rfc_opt_b_H1, X_test = X3, y_test = y3, thres = thres)

In [None]:
# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_b_H1, X1, y1, thres)

# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_b_H1, X2, y2, thres)

# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_b_H1, X3, y3, thres)

In [None]:
feats = {} # a dict to hold feature_name: feature_importance
for feature, importance in zip(X1.columns, rfc_opt_b_H1.feature_importances_):
    feats[feature] = importance #add the name/value pair 

importances = pd.DataFrame.from_dict(feats, orient='index').rename(columns={0: 'Gini-importance'})
importances = importances[importances['Gini-importance'] > 0.015]
importances = importances.sort_values('Gini-importance',ascending=False)
importances.plot(kind='bar', stacked=True, figsize=(10,5))

#### [M15_H1] SELL - Explores Best RFC Model

In [None]:
Label = 'Labels_M15_H1'

df_1  = df1_filt_s_M15_H1
df_2  = df2_filt_s_M15_H1
df_3  = df3_filt_s_M15_H1

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_s1 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_s1.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_s1, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_s1, X_test = X2, y_test = y2, thres = 0.5)

In [None]:
Label = 'Labels_M15_H1'

df_1  = df2_filt_s_M15_H1
df_2  = df3_filt_s_M15_H1
df_3  = df1_filt_s_M15_H1

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_s2 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_s2.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_s2, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_s2, X_test = X2, y_test = y2, thres = 0.5)

In [None]:
Label = 'Labels_M15_H1'

df_1  = df1_filt_s_M15_H1
df_2  = df3_filt_s_M15_H1
df_3  = df2_filt_s_M15_H1

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_s3 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_s3.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_s3, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_s3, X_test = X2, y_test = y2, thres = 0.5)

#### [M15_H1] SELL - Optimum RFC Model

The chosen model is rcf_s3 

In [None]:
# Load rfc_opt_s_H1 model
with open('rfc_opt_s_H1.pkl', 'rb') as f:
    rfc_opt_s_H1 = pickle.load(f)

In [None]:
#rfc_opt_s_H1 = rfc_s3

In [None]:
# Chosen rcf is rcf_s3
# Apply Random grid search to obtain optimum parameters on chosen model

Label = 'Labels_M15_H1'

df_1  = df1_filt_s_M15_H1
df_2  = df3_filt_s_M15_H1
df_3  = df2_filt_s_M15_H1

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Returns the optimum random search model
rfc_opt_s_H1 = model_rfc_random(X_train,y_train)

# Plots the ROC curve
roc(rfc_opt_s_H1, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc_opt_s_H1, X_test = X2, y_test = y2, thres = 0.3)

# save model to a file
with open('rfc_opt_s_H1.pkl', 'wb') as f:
    pickle.dump(rfc_opt_s_H1, f)

#### [M15_H1] SELL - Optimum model testing

In [None]:
# Test on remaining dataframes
Label = 'Labels_M15_H1'

X1 = df1_filt_s_M15_H1.drop([Label],axis=1)
y1 = df1_filt_s_M15_H1[Label]

X2 = df2_filt_s_M15_H1.drop([Label],axis=1)
y2 = df2_filt_s_M15_H1[Label]

X3 = df3_filt_s_M15_H1.drop([Label],axis=1)
y3 = df3_filt_s_M15_H1[Label]

thres = opt_thres(rfc_opt_s_H1, X1, y1, X2, y2, X3, y3, pr = 0.5)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_s_H1, X1, y1)

# Predicts
predictions(rfc_opt_s_H1, X_test = X1, y_test = y1, thres = thres)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_s_H1, X2, y2)

# Predicts
predictions(rfc_opt_s_H1, X_test = X2, y_test = y2, thres = thres)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_s_H1, X3, y3)

# Predicts
predictions(rfc_opt_s_H1, X_test = X3, y_test = y3, thres = thres)

In [None]:
# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_s_H1, X1, y1, thres)

# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_s_H1, X2, y2, thres)

# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_s_H1, X3, y3, thres)

In [None]:
feats = {} # a dict to hold feature_name: feature_importance
for feature, importance in zip(X1.columns, rfc_opt_s_H1.feature_importances_):
    feats[feature] = importance #add the name/value pair 

importances = pd.DataFrame.from_dict(feats, orient='index').rename(columns={0: 'Gini-importance'})
importances = importances[importances['Gini-importance'] > 0.015]
importances = importances.sort_values('Gini-importance',ascending=False)
importances.plot(kind='bar', stacked=True, figsize=(10,5))

#### [M15_H4] BUY - Explores Best RFC Model

In [None]:
Label = 'Labelb_M15_H4'

df_1  = df1_filt_b_M15_H4
df_2  = df2_filt_b_M15_H4
df_3  = df3_filt_b_M15_H4

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_b1 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_b1.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_b1, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_b1, X_test = X2, y_test = y2, thres = 0.5)

In [None]:
Label = 'Labelb_M15_H4'

df_1  = df2_filt_b_M15_H4
df_2  = df3_filt_b_M15_H4
df_3  = df1_filt_b_M15_H4

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_b2 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_b2.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_b2, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_b2, X_test = X2, y_test = y2, thres = 0.2)

In [None]:
Label = 'Labelb_M15_H4'

df_1  = df1_filt_b_M15_H4
df_2  = df3_filt_b_M15_H4
df_3  = df2_filt_b_M15_H4

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_b3 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_b3.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_b3, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_b3, X_test = X2, y_test = y2, thres = 0.5)

#### [M15_H4] BUY - Optimum RFC Model

The chosen model is rcf_b3 

In [None]:
# Load rfc_opt_b_H4 model
with open('rfc_opt_b_H4.pkl', 'rb') as f:
    rfc_opt_b_H4 = pickle.load(f)

In [None]:
#rfc_opt_b_H4 = rfc_b3

In [None]:
# Chosen rcf is rcf_b2
# Apply Random grid search to obtain optimum parameters on chosen model

Label = 'Labelb_M15_H4'

df_1  = df1_filt_b_M15_H4
df_2  = df3_filt_b_M15_H4
df_3  = df2_filt_b_M15_H4

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Returns the optimum random search model
rfc_opt_b_H4 = model_rfc_random(X_train,y_train)

# Plots the ROC curve
roc(rfc_opt_b_H4, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc_opt_b_H4, X_test = X2, y_test = y2, thres = 0.3)

# save model to a file
with open('rfc_opt_b_H4.pkl', 'wb') as f:
    pickle.dump(rfc_opt_b_H4, f)

#### [M15_H4] BUY - Optimum model testing

In [None]:
# Test on remaining dataframes
Label = 'Labelb_M15_H4'

X1 = df1_filt_b_M15_H4.drop([Label],axis=1)
y1 = df1_filt_b_M15_H4[Label]

X2 = df2_filt_b_M15_H4.drop([Label],axis=1)
y2 = df2_filt_b_M15_H4[Label]

X3 = df3_filt_b_M15_H4.drop([Label],axis=1)
y3 = df3_filt_b_M15_H4[Label]

thres = opt_thres(rfc_opt_b_H4, X1, y1, X2, y2, X3, y3, pr = 0.5)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_b_H4, X1, y1)

# Predicts
predictions(rfc_opt_b_H4, X_test = X1, y_test = y1, thres = thres)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_b_H4, X2, y2)

# Predicts
predictions(rfc_opt_b_H4, X_test = X2, y_test = y2, thres = thres)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_b_H4, X3, y3)

# Predicts
predictions(rfc_opt_b_H4, X_test = X3, y_test = y3, thres = thres)

In [None]:
# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_b_H4, X1, y1, thres)

# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_b_H4, X2, y2, thres)

# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_b_H4, X3, y3, thres)

In [None]:
feats = {} # a dict to hold feature_name: feature_importance
for feature, importance in zip(X1.columns, rfc_opt_b_H4.feature_importances_):
    feats[feature] = importance #add the name/value pair 

importances = pd.DataFrame.from_dict(feats, orient='index').rename(columns={0: 'Gini-importance'})
importances = importances[importances['Gini-importance'] > 0.015]
importances = importances.sort_values('Gini-importance',ascending=False)
importances.plot(kind='bar', stacked=True, figsize=(10,5))

#### [M15_H4] SELL - Explores Best RFC Model

In [None]:
Label = 'Labels_M15_H4'

df_1  = df1_filt_s_M15_H4
df_2  = df2_filt_s_M15_H4
df_3  = df3_filt_s_M15_H4

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_s1 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_s1.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_s1, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_s1, X_test = X2, y_test = y2, thres = 0.5)

In [None]:
Label = 'Labels_M15_H4'

df_1  = df2_filt_s_M15_H4
df_2  = df3_filt_s_M15_H4
df_3  = df1_filt_s_M15_H4

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_s2 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_s2.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_s2, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_s2, X_test = X2, y_test = y2, thres = 0.5)

In [None]:
Label = 'Labels_M15_H4'

df_1  = df1_filt_s_M15_H4
df_2  = df3_filt_s_M15_H4
df_3  = df2_filt_s_M15_H4

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Instantiates the model
rfc_s3 = RandomForestClassifier()
    
# Fit the model of df_1 & df_2 split
rfc_s3.fit(X_train,y_train)

# Plots the ROC curve
roc(rfc_s3, X2, y2)

# Predicts the df_3 df using the rfc model created from df_1 & df_2
predictions(rfc_s3, X_test = X2, y_test = y2, thres = 0.5)

#### [M15_H4] SELL - Optimum RFC Model

The chosen model is rcf_s3 

In [None]:
# Load rfc_opt_s_H4 model
with open('rfc_opt_s_H4.pkl', 'rb') as f:
    rfc_opt_s_H4 = pickle.load(f)

In [None]:
#rfc_opt_s_H1 = rfc_s3

In [None]:
# Chosen rcf is rcf_s3
# Apply Random grid search to obtain optimum parameters on chosen model

Label = 'Labels_M15_H4'

df_1  = df1_filt_s_M15_H4
df_2  = df3_filt_s_M15_H4
df_3  = df2_filt_s_M15_H4

# Concatenates df_1 & df_2 as X and y, and splits the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Returns the optimum random search model
rfc_opt_s_H4 = model_rfc_random(X_train,y_train)

# Plots the ROC curve
roc(rfc_opt_s_H4, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc_opt_s_H4, X_test = X2, y_test = y2, thres = 0.3)

# save model to a file
with open('rfc_opt_s_H4.pkl', 'wb') as f:
    pickle.dump(rfc_opt_s_H4, f)

#### [M15_H4] SELL - Optimum model testing

In [None]:
# Test on remaining dataframes
Label = 'Labels_M15_H4'

X1 = df1_filt_s_M15_H4.drop([Label],axis=1)
y1 = df1_filt_s_M15_H4[Label]

X2 = df2_filt_s_M15_H4.drop([Label],axis=1)
y2 = df2_filt_s_M15_H4[Label]

X3 = df3_filt_s_M15_H4.drop([Label],axis=1)
y3 = df3_filt_s_M15_H4[Label]

thres = opt_thres(rfc_opt_s_H4, X1, y1, X2, y2, X3, y3, pr = 0.5)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_s_H4, X1, y1)

# Predicts
predictions(rfc_opt_s_H4, X_test = X1, y_test = y1, thres = thres)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_s_H4, X2, y2)

# Predicts
predictions(rfc_opt_s_H4, X_test = X2, y_test = y2, thres = thres)

# Displays the ROC curve (Receiver Operating Characteristic)
roc(rfc_opt_s_H4, X3, y3)

# Predicts
predictions(rfc_opt_s_H4, X_test = X3, y_test = y3, thres = thres)

In [None]:
# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_s_H4, X1, y1, thres)

# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_s_H4, X2, y2, thres)

# Displays the Precision - Recall curve
Pre_rec_curve(rfc_opt_s_H4, X3, y3, thres)

In [None]:
feats = {} # a dict to hold feature_name: feature_importance
for feature, importance in zip(X1.columns, rfc_opt_s_H4.feature_importances_):
    feats[feature] = importance #add the name/value pair 

importances = pd.DataFrame.from_dict(feats, orient='index').rename(columns={0: 'Gini-importance'})
importances = importances[importances['Gini-importance'] > 0.015]
importances = importances.sort_values('Gini-importance',ascending=False)
importances.plot(kind='bar', stacked=True, figsize=(10,5))

## Testing random columns for Random forest

#### [M15_H1] BUY - Trains df_2 & df_3 ---> Tests df_1

In [None]:
df_1  = df2_filt_b_M15_H1
df_2  = df3_filt_b_M15_H1
df_3  = df1_filt_b_M15_H1
Label = 'Labelb_M15_H1'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc2 = RandomForestClassifier()
    
# Fit the random search model
rfc2.fit(X_train,y_train)

roc(rfc2, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc2, X_test = X2, y_test = y2, thres = 0.2)

In [None]:
feats = {} # a dict to hold feature_name: feature_importance
for feature, importance in zip(X_train.columns, rfc2.feature_importances_):
    feats[feature] = importance #add the name/value pair 

importances = pd.DataFrame.from_dict(feats, orient='index').rename(columns={0: 'Gini-importance'})
#importances.sort_values(by='Gini-importance').plot(kind='bar', rot=45)
importances.sort_values('Gini-importance',ascending=False)


In [None]:
importances = rfc.feature_importances_
indices = np.argsort(importances)[::-1]
features_names = df1_filt_b_M15_H1.columns
feature_names = [features_names[i] for i in indices]
print("Feature ranking:")

for f in range(X_train.shape[1]):
    print(features_names[f], importances[indices[f]])
    


#### [M15_H1] BUY - Trains df_1 & df_3 ---> Tests df_2

In [None]:
"['MACD_signal_H1' 'Dist_BollB_SMA_20_Var_+2_H1'\n 'Dist_BollB_SMA_20_Var_-2_H1' 'BollB_Wideness_H1' 'Closing_H4'\n 'N_Breaks_H4' 'MS_Pds_H4' 'MS_range_H4' 'RSI_14_SMA_H4' 'RSI_14_EMA_H4'\n 'MACD_12_26_H4' 'Hist_MACD_12_26_9_H4' 'MACD_signal_H4'\n 'Dist_BollB_SMA_20_Var_+2_H4' 'Dist_BollB_SMA_20_Var_-2_H4'\n 'BollB_Wideness_H4' 'Closing_D1' 'N_Breaks_D1' 'MS_Pds_D1' 'MS_range_D1'\n 'RSI_14_SMA_D1' 'RSI_14_EMA_D1' 'MACD_12_26_D1' 'Hist_MACD_12_26_9_D1'\n 'MACD_signal_D1' 'Dist_BollB_SMA_20_Var_+2_D1'\n 'Dist_BollB_SMA_20_Var_-2_D

In [None]:
df_1.columns

In [None]:
df_1  = df1_filt_b_M15_H1#.drop(del_cols,axis=1)
df_2  = df3_filt_b_M15_H1#.drop(del_cols,axis=1)
df_3  = df2_filt_b_M15_H1#.drop(del_cols,axis=1)
Label = 'Labelb_M15_H1'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

#rfc3 = RandomForestClassifier()
    
# Fit the random search model
#rfc2.fit(X_train,y_train)

roc(rfc2, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc2, X_test = X2, y_test = y2, thres = 0.5)

In [None]:
feats = {} # a dict to hold feature_name: feature_importance
for feature, importance in zip(X_train.columns, rfc3.feature_importances_):
    feats[feature] = importance #add the name/value pair 

importances = pd.DataFrame.from_dict(feats, orient='index').rename(columns={0: 'Gini-importance'})
#importances.sort_values(by='Gini-importance').plot(kind='bar', rot=45)
importances.sort_values('Gini-importance',ascending=False)


In [None]:
importances = rfc2.feature_importances_
indices = np.argsort(importances)[::-1]
features_names = df1_filt_b_M15_H1.columns
feature_names = [features_names[i] for i in indices]
print("Feature ranking:")

for f in range(X_train.shape[1]):
    print(features_names[f], importances[indices[f]])
    


#### [M15_H1] SELL - Trains df_1 & df_2 ---> Tests df_3

In [None]:
df_1  = df1_filt_s_M15_H1
df_2  = df2_filt_s_M15_H1
df_3  = df3_filt_s_M15_H1
Label = 'Labels_M15_H1'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc = RandomForestClassifier()
    
# Fit the random search model
rfc.fit(X_train,y_train)

roc(rfc, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc, X_test = X2, y_test = y2, thres = 0.5)

#### [M15_H1] SELL - Trains df_2 & df_3 ---> Tests df_1

In [None]:
df_1  = df2_filt_s_M15_H1
df_2  = df3_filt_s_M15_H1
df_3  = df1_filt_s_M15_H1
Label = 'Labels_M15_H1'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc = RandomForestClassifier()
    
# Fit the random search model
rfc.fit(X_train,y_train)

roc(rfc, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc, X_test = X2, y_test = y2, thres = 0.2)

#### [M15_H1] SELL - Trains df_1 & df_3 ---> Tests df_2

In [None]:
df_1  = df1_filt_s_M15_H1
df_2  = df3_filt_s_M15_H1
df_3  = df2_filt_s_M15_H1
Label = 'Labels_M15_H1'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc = RandomForestClassifier()
    
# Fit the random search model
rfc.fit(X_train,y_train)

roc(rfc, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc, X_test = X2, y_test = y2, thres = 0.25)

#### [M15_H4] BUY - Trains df_1 & df_2 ---> Tests df_3

In [None]:
df_1  = df1_filt_b_M15_H4
df_2  = df2_filt_b_M15_H4
df_3  = df3_filt_b_M15_H4
Label = 'Labelb_M15_H4'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc = RandomForestClassifier()
    
# Fit the random search model
rfc.fit(X_train,y_train)

roc(rfc, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc, X_test = X2, y_test = y2, thres = 0.4)

#### [M15_H4] BUY - Trains df_2 & df_3 ---> Tests df_1

In [None]:
df_1  = df2_filt_b_M15_H4
df_2  = df3_filt_b_M15_H4
df_3  = df1_filt_b_M15_H4
Label = 'Labelb_M15_H4'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc = RandomForestClassifier()
    
# Fit the random search model
rfc.fit(X_train,y_train)

roc(rfc, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc, X_test = X2, y_test = y2, thres = 0.4)

#### [M15_H4] BUY - Trains df_1 & df_3 ---> Tests df_2

In [None]:
df_1  = df1_filt_b_M15_H4
df_2  = df3_filt_b_M15_H4
df_3  = df2_filt_b_M15_H4
Label = 'Labelb_M15_H4'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc = RandomForestClassifier()
    
# Fit the random search model
rfc.fit(X_train,y_train)

roc(rfc, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc, X_test = X2, y_test = y2, thres = 0.4)

#### [M15_H4] SELL - Trains df_1 & df_2 ---> Tests df_3

In [None]:
df_1  = df1_filt_s_M15_H4
df_2  = df2_filt_s_M15_H4
df_3  = df3_filt_s_M15_H4
Label = 'Labels_M15_H4'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc = RandomForestClassifier()
    
# Fit the random search model
rfc.fit(X_train,y_train)

roc(rfc, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc, X_test = X2, y_test = y2, thres = 0.4)

#### [M15_H4] SELL - Trains df_2 & df_3 ---> Tests df_1

In [None]:
df_1  = df2_filt_s_M15_H4
df_2  = df3_filt_s_M15_H4
df_3  = df1_filt_s_M15_H4
Label = 'Labels_M15_H4'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc = RandomForestClassifier()
    
# Fit the random search model
rfc.fit(X_train,y_train)

roc(rfc, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc, X_test = X2, y_test = y2, thres = 0.4)

#### [M15_H4] SELL - Trains df_1 & df_3 ---> Tests df_2

In [None]:
df_1  = df1_filt_s_M15_H4
df_2  = df3_filt_s_M15_H4
df_3  = df2_filt_s_M15_H4
Label = 'Labels_M15_H4'

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

rfc = RandomForestClassifier()
    
# Fit the random search model
rfc.fit(X_train,y_train)

roc(rfc, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
predictions(rfc, X_test = X2, y_test = y2, thres = 0.4)

In [None]:
for i in range(0,5000):

    random_cols = list(random_cols_drop(df1_filt_b_M15_H1, random.randrange(1,11,2)))
    
    if Label in random_cols:
        random_cols.remove(Label)
        random_cols = random_cols + ['MS_retracement_H1', 'MS_retracement_D1', 'MS_Pds_D1', 'MS_Pds_H1', 'N_Breaks_H1', 'EMA_MACD_12_26_9_H1', 'Dist_BollB_SMA_20_Var_+2_M15', 'BollB_Wideness_H1']
    
    lr_auc1, random_cols1 = roc_ass(random_cols, df1_filt_b_M15_H1, df2_filt_b_M15_H1, df3_filt_b_M15_H1, Label = 'Labelb_M15_H1')
    lr_auc2, random_cols2 = roc_ass(random_cols, df1_filt_b_M15_H1, df3_filt_b_M15_H1, df2_filt_b_M15_H1, Label = 'Labelb_M15_H1')
    lr_auc3, random_cols3 = roc_ass(random_cols, df2_filt_b_M15_H1, df3_filt_b_M15_H1, df1_filt_b_M15_H1, Label = 'Labelb_M15_H1')
    lr_auc4, random_cols4 = roc_ass(random_cols, df1_filt_s_M15_H1, df2_filt_s_M15_H1, df3_filt_s_M15_H1, Label = 'Labels_M15_H1')
    lr_auc5, random_cols5 = roc_ass(random_cols, df1_filt_s_M15_H1, df3_filt_s_M15_H1, df2_filt_s_M15_H1, Label = 'Labels_M15_H1')
    lr_auc6, random_cols6 = roc_ass(random_cols, df2_filt_s_M15_H1, df3_filt_s_M15_H1, df1_filt_s_M15_H1, Label = 'Labels_M15_H1')
    
    par = 0.5
    if (i%20 == 0):
        print(i)
    if (lr_auc1 > par) & (lr_auc2 > par) & (lr_auc3 > par) & (lr_auc4 > par) & (lr_auc5 > par) & (lr_auc6 > par):
        print(lr_auc1,lr_auc2,lr_auc3,lr_auc4,lr_auc5,lr_auc6, random_cols)

In [None]:
l1 = ['MS_range_M15', 'RSI_14_SMA_M15', 'Hist_MACD_12_26_9_M15', 'Boll_SMA_20_M15', 'Dist_BollB_SMA_20_Var_+2_M15', 'Dist_BollB_SMA_20_Var_-2_M15', 'BollB_Wideness_M15', 'N_Breaks_H1', 'MS_Pds_H1', 'MS_range_H1', 'EMA_MACD_12_26_9_H1', 'MACD_signal_H1', 'Dist_BollB_SMA_20_Var_+2_H1', 'BollB_Wideness_H1', 'Closing_H4', 'MS_Pds_H4', 'MS_range_H4', 'RSI_14_EMA_H4', 'EMA_MACD_12_26_9_H4', 'MACD_signal_H4', 'Dist_BollB_SMA_20_Var_-2_H4', 'BollB_Wideness_H4', 'N_Breaks_D1', 'MS_Pds_D1', 'Hist_MACD_12_26_9_D1', 'MACD_signal_D1', 'Dist_BollB_SMA_20_Var_-2_D1', 'BollB_Wideness_D1', 'MS_retracement_D1', 'MS_retracement_H1']
l2 = ['Closing_M15', 'RSI_14_EMA_M15', 'Hist_MACD_12_26_9_M15', 'MACD_signal_M15', 'Boll_SMA_20_M15', 'Dist_BollB_SMA_20_Var_+2_M15', 'N_Breaks_H1', 'MS_Pds_H1', 'MS_range_H1', 'EMA_MACD_12_26_9_H1', 'MACD_signal_H1', 'Dist_BollB_SMA_20_Var_+2_H1', 'BollB_Wideness_H1', 'Closing_H4', 'N_Breaks_H4', 'RSI_14_SMA_H4', 'EMA_MACD_12_26_9_H4', 'Hist_MACD_12_26_9_H4', 'Dist_BollB_SMA_20_Var_+2_H4', 'Dist_BollB_SMA_20_Var_-2_H4', 'BollB_Wideness_H4', 'Closing_D1', 'MS_Pds_D1', 'RSI_14_EMA_D1', 'EMA_MACD_12_26_9_D1', 'MACD_signal_D1', 'Dist_BollB_SMA_20_Var_+2_D1', 'Dist_BollB_SMA_20_Var_-2_D1', 'MS_retracement_D1', 'MS_retracement_H1', 'MS_retracement_M15']
l3 = ['MS_Pds_M15', 'MS_range_M15', 'RSI_14_EMA_M15', 'EMA_MACD_12_26_9_M15', 'Hist_MACD_12_26_9_M15', 'MACD_signal_M15', 'Dist_BollB_SMA_20_Var_+2_M15', 'Closing_H1', 'N_Breaks_H1', 'MS_Pds_H1', 'RSI_14_SMA_H1', 'EMA_MACD_12_26_9_H1', 'MACD_signal_H1', 'Dist_BollB_SMA_20_Var_-2_H1', 'BollB_Wideness_H1', 'MS_range_H4', 'RSI_14_SMA_H4', 'RSI_14_EMA_H4', 'Hist_MACD_12_26_9_H4', 'MACD_signal_H4', 'Dist_BollB_SMA_20_Var_+2_H4', 'Closing_D1', 'MS_Pds_D1', 'RSI_14_SMA_D1', 'RSI_14_EMA_D1', 'EMA_MACD_12_26_9_D1', 'Hist_MACD_12_26_9_D1', 'Dist_BollB_SMA_20_Var_+2_D1', 'BollB_Wideness_D1', 'MS_retracement_D1', 'MS_retracement_H1']
l4 = ['MS_Pds_M15', 'MS_range_M15', 'RSI_14_SMA_M15', 'RSI_14_EMA_M15', 'Boll_SMA_20_M15', 'Dist_BollB_SMA_20_Var_+2_M15', 'Closing_H1', 'N_Breaks_H1', 'MS_Pds_H1', 'MS_range_H1', 'RSI_14_SMA_H1', 'EMA_MACD_12_26_9_H1', 'Hist_MACD_12_26_9_H1', 'BollB_Wideness_H1', 'Closing_H4', 'N_Breaks_H4', 'MS_Pds_H4', 'RSI_14_SMA_H4', 'RSI_14_EMA_H4', 'MACD_signal_H4', 'BollB_Wideness_H4', 'MS_Pds_D1', 'RSI_14_EMA_D1', 'Dist_BollB_SMA_20_Var_+2_D1', 'Dist_BollB_SMA_20_Var_-2_D1', 'BollB_Wideness_D1', 'MS_retracement_D1', 'MS_retracement_H4', 'MS_retracement_H1', 'MS_retracement_M15']
print(list(set(l1).intersection(l2)))
print(list(set(l1).intersection(l3)))
print(list(set(l1).intersection(l4)))
print(list(set(l2).intersection(l3)))
print(list(set(l2).intersection(l4)))
print(list(set(l3).intersection(l4)))

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

print('Predictions df2 - Threshold >= 0.3')
predicted_proba = rfc_random.predict_proba(X_test)
predictions = (predicted_proba [:,1] >= 0.5).astype('int')
print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

print('Predictions df3 - Threshold >= 0.3')
predicted_proba = rfc_random.predict_proba(X2)
predictions = (predicted_proba [:,1] >= 0.3).astype('int')
print(classification_report(y2,predictions))
print(confusion_matrix(y2,predictions))

In [None]:
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import plot_precision_recall_curve
import matplotlib.pyplot as plt

disp = plot_precision_recall_curve(rfc_random, X2, y2)
disp.ax_.set_title('2-class Precision-Recall curve: '
                   'AP={0:0.2f}'.format(precision))

In [None]:
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import plot_precision_recall_curve
import matplotlib.pyplot as plt

disp = plot_precision_recall_curve(rfc_random, X_test, y_test)
disp.ax_.set_title('2-class Precision-Recall curve: '
                   'AP={0:0.2f}'.format(precision))

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

print('Predictions df2')
predictions = rfc_random.predict(X_test)
print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))

print('Predictions df2 - Threshold >=0.3')
predicted_proba = rfc_random.predict_proba(X_test)
predictions = (predicted_proba [:,1] >= 0.3).astype('int')
print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))

print('Predictions df3')
predictions = rfc_random.predict(X2)
print(classification_report(y2,predictions2))
print(confusion_matrix(y2,predictions2))

print('Predictions df3 - Threshold >=0.3')
predicted_proba = rfc_random.predict_proba(X2)
predictions = (predicted_proba [:,1] >= 0.3).astype('int')
print(classification_report(y2,predictions))
print(confusion_matrix(y2,predictions))


#predicted_proba = rfc_random.predict_proba(X2)
#predicted = (predicted_proba [:,1] >= threshold).astype('int')


In [None]:
[int(x) for x in np.linspace(start = 0.1, stop = 0.5, num = 4)]

In [None]:
from sklearn.metrics import accuracy_score, precision_score, average_precision_score

for i in [0.1, 0.2, 0.3, 0.4, 0.5]:

    threshold = i

    predicted_proba = rfc_random.predict_proba(X2)
    predicted = (predicted_proba [:,1] >= threshold).astype('int')
    
    print(i)
    print(classification_report(y2,predicted))
    print(confusion_matrix(y2,predicted))

    #accuracy  = accuracy_score(y_test, predicted)
    #precision = precision_score(y_test, predicted)

    #print(precision,accuracy)

In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.base import BaseEstimator, ClassifierMixin
X, y = make_classification(random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

class CustomThreshold(BaseEstimator, ClassifierMixin):
    """ Custom threshold wrapper for binary classification"""
    def __init__(self, base, threshold=0.5):
        self.base = base
        self.threshold = threshold
    def fit(self, *args, **kwargs):
        self.base.fit(*args, **kwargs)
        return self
    def predict(self, X):
        return (self.base.predict_proba(X)[:, 1] > self.threshold).astype(int)

rfc = RandomForestClassifier(random_state=1).fit(X_train, y_train)
clf = [CustomThreshold(rf, threshold) for threshold in [0.3, 0.5, 0.7]]

for model in clf:
    print(confusion_matrix(y_test, model.predict(X_test)))

### Decision Tree Model
#### Instantiating and Fitting

In [None]:
from sklearn.tree import DecisionTreeClassifier

dtree = DecisionTreeClassifier()
dtree.fit(X_train,y_train)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

predictions = dtree.predict(X_test)
print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))

predictions2 = dtree.predict(X2)
print(classification_report(y2,predictions2))
print(confusion_matrix(y2,predictions2))

#### Predictions and Evaluation

In [None]:
predictions = dtree.predict(X_test)

from sklearn.metrics import classification_report,confusion_matrix

print(classification_report(y_test,predictions))

print(confusion_matrix(y_test,predictions))

In [None]:
predictions_ver = dtree.predict(df3_final.drop('Label_B',axis=1))

print(classification_report(df3_final['Label_B'],predictions_ver))

print(confusion_matrix(df3_final['Label_B'],predictions_ver))

In [None]:
predictions = pd.DataFrame(predictions,columns=['Predictions'])
pred_df = pd.DataFrame(y_test,columns=['Label_B'])
pred_df = pred_df.reset_index(drop=True)
pred_df = pd.concat([pred_df,predictions],axis=1)
#pred_df.head(100)

In [None]:
#pred_df.to_excel('T1213.xlsx', index = False)

### Random Forest

#### Instantiating and Fitting

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix

X = df1_final.drop('Labelb_H1',axis=1)
y = df1_final['Labelb_H1']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,shuffle=True,random_state = 101)

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(X_train,y_train)

In [None]:
rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(X_train,y_train)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

predictions = rfc.predict(X_test)
print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))

predictions2 = rfc.predict(X2)
print(classification_report(y2,predictions2))
print(confusion_matrix(y2,predictions2))

In [None]:
predictions_ver = rfc.predict(df3_final.drop('Label_b_H1',axis=1))

print(classification_report(df3_final['Label_B'],predictions_ver))

print(confusion_matrix(df3_final['Label_B'],predictions_ver))

### Neural Networks 1

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
# CODE HERE
model = Sequential()

# input layer
model.add(Dense(78,  activation='relu'))
model.add(Dropout(0.2))

# hidden layer
model.add(Dense(39, activation='relu'))
model.add(Dropout(0.2))

# hidden layer
model.add(Dense(19, activation='relu'))
model.add(Dropout(0.2))

# output layer
model.add(Dense(units=1,activation='sigmoid'))

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam')

In [None]:
early_stop = EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 1, patience = 25)
    # monitor = 'Val_loss' & mode = 'min' ==> we want to minimise loss
    # patience - # of iter. extra after early stop has been 'triggered' to ensure it wasn't only noise

Label = 'Labelb_M15_H1'

df_1  = df1_filt_b_M15_H1
df_2  = df2_filt_b_M15_H1
df_3  = df3_filt_b_M15_H1

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

# Fit the NN model
model.fit(x=X_train, 
          y=y_train, 
          epochs=150,
          batch_size=256,
          validation_data=(X_test, y_test), 
          )


#roc(model, X2, y2)

# Predicts the 3rd df using the random search model created from df1 & df2
#predictions(rfc, X_test = X2, y_test = y2, thres = 0.4)    



In [None]:
from tensorflow.keras.models import load_model

#model.save('my_Lendingclub_model.h5')

In [None]:
loss_df = pd.DataFrame(model.history.history) 
loss_df.plot()
plt.xlim(0, 250)

In [None]:
predictions = model.predict_classes(X2)

print(classification_report(y2,predictions))

print(confusion_matrix(y2,predictions))

In [None]:
predictions_ver = model.predict_classes(df3_final.drop('Label_B',axis=1))

print(classification_report(df3_final['Label_B'],predictions_ver))

print(confusion_matrix(df3_final['Label_B'],predictions_ver))

### Neural Networks 2

In [None]:
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

Label = 'Labelb_M15_H1'

df_1  = df1_filt_b_M15_H1
df_2  = df2_filt_b_M15_H1
df_3  = df3_filt_b_M15_H1

# Concatenates df_1 & df_2 as X and y, and split the data in train - test. We validate afterwards with df_3
X_train, X_test, y_train, y_test, X2, y2 = split(df_1,df_2,df_3,Label)

#encoder = LabelEncoder()
#encoder.fit(y)
#y = encoder.transform(y)
#y = to_categorical(y)

In [None]:
from sklearn.preprocessing import MinMaxScaler

Scaler = MinMaxScaler()

X_train = Scaler.fit_transform(X_train)
X_test = Scaler.transform(X_test)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential()

model.add(Dense(len(df_1.columns)-1,input_dim = len(df_1.columns)-1,activation='relu'))
model.add(Dense(2*(len(df_1.columns)-1),activation='relu'))
model.add(Dense(2*(len(df_1.columns)-1),activation='relu'))
model.add(Dense(len(df_1.columns)-1,activation='relu'))

# BINARY CLASSIFICATION
#model.add(Dense(3, activation = 'softmax')) # activation='sigmoid'

#model.compile(loss = 'categorical_crossentropy' , optimizer = 'adam' , metrics = ['accuracy'] )

# output layer
model.add(Dense(units=1,activation='sigmoid'))

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam')



from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=25, verbose=1, mode='min')


In [None]:
model.fit(x = X_train, y = y_train, validation_data = (X_test,y_test),epochs=350, callbacks=[early_stop])

predictions = model.predict(X_test)

In [None]:
predictions = model.predict_classes(X2)

In [None]:
predicted_proba = model.predict_proba(X2)
predictions = (predicted_proba [:,1] >= 0.3).astype('int')

print(classification_report(y2,predictions))

print(confusion_matrix(y2,predictions))

In [None]:
predictions_ver = model.predict_classes(df3_filt_b_H1_f.drop('Labelb_H1',axis=1))

print(classification_report(df3_filt_b_H1_f['Labelb_H1'],predictions_ver))

print(confusion_matrix(df3_filt_b_H1_f['Labelb_H1'],predictions_ver))

## Main

In [None]:
def read_and_merge_D1_H4_H1(df_file_Path_H1,df_file_Path_H4,df_file_Path_D1, pickle_name):
    
    if ((df_file_Path_H1 != None) & (df_file_Path_H4 != None) & (df_file_Path_D1 != None)):
        
        df_H1, df_H4, df_D1 = Read_dfs(df_file_Path_H1,df_file_Path_H4,df_file_Path_D1)
        
        df_4H_1D = Merge_shift_df_4H_1D(Rename_df(df_H4,'H4'),Rename_df(df_D1,'D1'))
        df = Merge_shift_df_1H_4H(Rename_df(df_H1,'H1'),df_4H_1D)
        
        df = Price_H1(df,3)
        df = Price_H1_H4(df,3)
        
        df.to_pickle(pickle_name)
        
        return df
        
    else:
        
        df = pd.read_pickle(pickle_name)
        
        return df
        

In [None]:
df1 = read_and_merge_D1_H4_H1(df_file_Path_H1 = None,df_file_Path_H4 = None, df_file_Path_D1 = None, pickle_name = 'df1.pkl')