In [1]:
import pandas as pd
import numpy as np
import investpy
from sklearn.linear_model import LogisticRegression
import talib as ta
from statsmodels.formula.api import logit

import plotly
import plotly.graph_objects as go 
import plotly.express as px

In [110]:
def getExponentialMovingAverage(close, days):
    ema = []
    ema.append(close[0])
    smooth = 2/(days+1) 
    for i in range(1,len(close)):
        ema.append(close[i] * smooth + ema[i-1] * (1-smooth))
    return ema

def expandMovingAverage(movingAverage, interval):
    index = 0
    ma_stock = pd.DataFrame(columns = ["MA" + str(interval)])
    for i in range(0,len(movingAverage)-1):
        ma_stock.loc[index] = movingAverage.iloc[i] ##adds the n element to the series, we also set the index rather than adding new index
        diff = ((movingAverage.iloc[i+1] - movingAverage.iloc[i])/interval)
        mult = 1
        index += 1
        for a in range(index, index + interval - 1):
            ma_stock.loc[index] = (diff * mult) + movingAverage.iloc[i] #adds the difference between the n and n+1 to the series
            mult += 1
            index += 1
    return ma_stock

def ceildiv(a, b):
    return -(-a // b)

def setColor(interval, movingAverage):
    colors = []
    for i in range(interval-1,len(movingAverage), interval): #interval - 1 because 0 should be counted
        if(i+interval < len(movingAverage)):
            if(movingAverage[i] > movingAverage[i+interval]):
                colors.append('red')
            else:
                colors.append('green')
    return colors


In [123]:
stocks = investpy.get_stocks_list()
def searchAndGetInfo(stock, country, from_date, to_date, interval):
    search_results = investpy.search_quotes(text = stock, countries=[country], n_results=1)
    stock = search_results.retrieve_historical_data(from_date=from_date, to_date=to_date)

    sma_stock = stock['Close'].groupby(ceildiv(np.arange(len(stock['Close'])),interval)).mean()
    ema_stock = getExponentialMovingAverage(sma_stock, interval)
    ema_stock = pd.Series(ema_stock)
    ema_expanded = expandMovingAverage(ema_stock, interval)
    sma_expanded = expandMovingAverage(sma_stock, interval)
    stock.reset_index(inplace = True)

    ohlc = stock.loc[:, ['Date', 'Open', 'High', 'Low', 'Close']]
    ohlc['Date'] = pd.to_datetime(ohlc['Date'])
    ohlc['SMA'] = sma_expanded
    ohlc['EMA'] = ema_expanded
    
    #plot stock data
    fig = go.Figure(
        data=[go.Candlestick(x=ohlc['Date'],
                open = ohlc['Open'],
                high = ohlc['High'],
                low = ohlc['Low'],
                close = ohlc['Close']
    )])
    #plot sma
    fig.add_trace(
        go.Scatter(
            x = ohlc['Date'],
            y = ohlc['SMA'],
            name = f'SMA {interval}',
            line = dict(
                color="blue"
            )
        )
    )
    #plot ema
    fig.add_trace(
        go.Scatter(
            x = ohlc['Date'],
            y = ohlc['EMA'],
            name = f'EMA {interval}',
            line = dict(
                color="purple"
            ),
        )
    )
    fig.show()

    
#     for i in range(interval-1,len(ohlc['EMA']), interval): #interval - 1 because 0 should be counted
#         if(i+interval < len(ema_stock)):
#             if(ohlc['EMA'][i] > ohlc['EMA'][i+interval]):
#                 chart.plot(ohlc['Date'][i], ohlc['EMA'][i], 'o', color='red')
#             else:
#                 chart.plot(ohlc['Date'][i], ohlc['EMA'][i], 'o', color='green')

def retrieveInformation(stock, country):
    search_results = investpy.search_quotes(text = stock, countries=[country], n_results=1)
    print(search_results.retrieve_information())
    
def getPercentageGrowth(stock, country, from_date, to_date):
    search_results = investpy.search_quotes(text = stock, countries=[country], n_results=1)
    stock = search_results.retrieve_historical_data(from_date=from_date, to_date=to_date)
    growth = (stock['Close']/stock['Close'][0])*100
    fig = px.line(growth, title=f'Percentage Growth from {from_date}')
    fig.update_layout(
        showlegend=True,
        yaxis=dict(type='linear',ticksuffix='%'),
        yaxis_title_text =  'percent per unit'
    )
    fig.update_traces(mode='lines+markers')
    fig.show()

def predict_strength(stock, country, from_date, to_date, interval):
    search_results = investpy.search_quotes(text = stock, countries=[country], n_results=1)
    stock = search_results.retrieve_historical_data(from_date=from_date, to_date=to_date)

    sma_stock = stock['Close'].groupby(ceildiv(np.arange(len(stock['Close'])),interval)).mean()
    ema_stock = getExponentialMovingAverage(sma_stock, interval)
    ema_stock = pd.Series(ema_stock)
    ema_expanded = expandMovingAverage(ema_stock, interval)
    sma_expanded = expandMovingAverage(sma_stock, interval)
    stock.reset_index(inplace = True)

    ohlc = stock.loc[:, ['Date', 'Open', 'High', 'Low', 'Close']]
    ohlc['Date'] = pd.to_datetime(ohlc['Date'])
    ohlc['SMA'] = sma_expanded
    ohlc['EMA'] = ema_expanded
    
    #plot stock data
    fig = go.Figure(
        data=[go.Candlestick(x=ohlc['Date'],
                open = ohlc['Open'],
                high = ohlc['High'],
                low = ohlc['Low'],
                close = ohlc['Close']
    )])
    #plot sma
    fig.add_trace(
        go.Scatter(
            x = ohlc['Date'],
            y = ohlc['SMA'],
            name = f'SMA {interval}',
            line = dict(
                color="blue"
            )
        )
    )
    #plot ema
    fig.add_trace(
        go.Scatter(
            x = ohlc['Date'],
            y = ohlc['EMA'],
            name = f'EMA {interval}',
            line = dict(
                color="purple"
            ),
        )
    )
    fig.show()
    
    #could use EMA for more price change resilient MA
    #should not really compute anymore as SMA/EMA is already computed
    ohlc['Corr'] = ohlc['Close'].rolling(window=interval).corr(ohlc['EMA']) 
    ohlc['RSI'] = ta.RSI(np.array(ohlc['Close']), timeperiod = interval)

    # The "shift" here indicates that we are subtracting the "open" of the actual day to the "close" of the previous day
    ohlc['Open-Close'] = ohlc['Open'] - ohlc['Close'].shift(1)
    ohlc['Open-Open'] = ohlc['Open'] - ohlc['Open'].shift(1)
    ohlc = ohlc.dropna() ##drops NaN values but also drops incomplete data

    X = ohlc.iloc[:,1:11] #select to be used row
    
    # Here we take "-1" as parameter (inverse) to the shift function as we want 
    # the closing of the next day and not the previous day.
    # The numpy (np) function "where" creates a new variable called "y" where 
    # the value is "1" if the close of the next day is higher than the close of 
    # the actual day. In our case we create an array, but an additional column 
    # could have been created for example by replacing the "y" by "df['Target']" 
    # for example.
    y = np.where (ohlc['Close'].shift(-1) > ohlc['Close'],1,-1)
    
    #split the data into 70% validation and 30% test
    split = int(0.7*len(ohlc))
    
    X_train, X_test, y_train, y_test = X[:split], X[split:], y[:split], y[split:]
    model = LogisticRegression()
    model = model.fit (X_train,y_train)
    
    coef = model.coef_
    
    newdf = X_train
    
    newdf = newdf.assign(up_down=pd.Series(y_train, index=newdf.index).values)

    newdf['up_down']= newdf['up_down'].replace(-1, 0)
    #We replace all the "-1" by "0", otherwise we'll get the following error message with this library since we need either a 0 or a 1 as dependent variables : "ValueError: endog must be in the unit interval".

    ##higher the P-Value of RSI, the higher the possibility that the data was generated by random chance
    #The lower the p-value, the more the feature has a predictive power
    model =logit("up_down ~ Open + High + Low + Close + EMA + Corr + RSI + Open-Close + Open-Open", data = newdf)
    results = model.fit()
    print(results.summary())
            
#def testBed2(stock, country, from_date, to_date, interval):