<a href="https://colab.research.google.com/github/summeryqc/fun_for_stockmarket/blob/master/stock_predictor_00.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Test 1: Using Fbprophet Module to predict stock market
## 1. Database of historical stock prices
## 2. Fbprophet Module for stock predicting
## 3. Technical analysis







In [None]:
import pandas_datareader.data as web
import os
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt

# Setup the period of data we want to fetch
start = datetime(2017, 7, 20)
end = datetime(2020, 7, 20)
stock_name = "AAPL"
#start = '2017-07-20'
#end = '2020-07-20'
# Add API key for accessing Alpha Vantage database
os.environ["ALPHAVANTAGE_API_KEY"] = "TYPE YOUR OWN KEY"
stock_data = web.DataReader(stock_name, "av-daily", start, end, api_key=os.getenv('ALPHAVANTAGE_API_KEY'))

In [None]:
# For debugging
type(stock_data.tail().index[0])
dt_string = stock_data.index[0]
dt_object2 = datetime.strptime(dt_string, "%Y-%m-%d")
print("dt_object2 =", dt_object2)
print("test ",datetime(2017,7,20))
type(dt_object2)

In [None]:
# Plot the stock price in defined period
stock_data['close'].plot(figsize=(16,8),color='#002699',alpha=0.8)
plt.xlabel("Date",fontsize=12,fontweight='bold',color='gray')
plt.ylabel('Price',fontsize=12,fontweight='bold',color='gray')
plt.title("Stock price for Apple",fontsize=18)
plt.show()

In [None]:
# Python, using Prophet forecast package to predict stock price
import pandas as pd
from fbprophet import Prophet
from sklearn.metrics import mean_squared_error
from matplotlib.animation import FuncAnimation

In [None]:
### Defining a cycle analysis function to get return and mse ###

# data: pandas dataset
# split_date: 
# cycle:
# mode:
# forecast_plot:
# print_ind:
# stock_name:

def cycle_analysis(data,split_date,cycle,stock_name,mode='additive',forecast_plot = False,print_ind=False):
  # Split training and testing data
  training = data[:split_date].iloc[:-1,]
  testing = data[split_date:]
  predict_period = len(pd.date_range(split_date,max(data.index)))
  df = training.reset_index()
  df.columns = ['ds','y']
  # Using Prophet module with self defined cycle
  m = Prophet(weekly_seasonality=False,yearly_seasonality=False,daily_seasonality=False)
  m.add_seasonality('self_define_cycle',period=cycle,fourier_order=8,mode=mode)
  # Make predictions
  m.fit(df)
  future = m.make_future_dataframe(periods=predict_period)
  # The predict method will assign each row in future a predicted value which it names yhat
  forecast = m.predict(future)
  if forecast_plot:
    m.plot(forecast)
    testing_plot = testing.index.map(lambda x:datetime.strptime(x,"%Y-%m-%d"))
    plt.plot(testing_plot,testing.values,'.',color='#ff3333',alpha=0.6)
    plt.xlabel(stock_name + 'Date',fontsize=12,fontweight='bold',color='gray')
    plt.ylabel('Price(USD)',fontsize=12,fontweight='bold',color='gray')
    plt.legend(('training_predict', 'original', 'tesing_predict'))
    plt.show()
  # Calculate ideal maximun return (max-min)
  ret = max(forecast.self_define_cycle)-min(forecast.self_define_cycle)
  # Get the column of predict values
  model_tb = forecast['yhat']
  model_tb.index = forecast['ds'].map(lambda x:x.strftime("%Y-%m-%d"))
  # Combine DataFrame objects horizontally along the x axis by passing in axis=1
  out_tb = pd.concat([testing,model_tb],axis=1)
  out_tb = out_tb[~out_tb.iloc[:,0].isnull()]
  out_tb = out_tb[~out_tb.iloc[:,1].isnull()]
  # Mean square error
  mse = mean_squared_error(out_tb.iloc[:,0],out_tb.iloc[:,1])
  rep = [ret,mse]
  if print_ind:
    print('Projected return per cycle: {}'.format(round(rep[0],2)))
    print("MSE: {}".format(round(rep[1],4)))
  return rep

In [None]:
# For debugging and testing
cycle_analysis(stock_data['close'], '2020-01-20', 30, stock_name, forecast_plot=True, print_ind=True)

In [None]:
# Using a loop to calcaulate return and mean square error for defferent cycle
testing_box = range(10,301)
return_box = []
mse_box = []
for c in testing_box:
  f = cycle_analysis(stock_data['close'],'2020-01-20',c)
  return_box.append(f[0])
  mse_box.append(f[1])

In [None]:
report = pd.DataFrame({'cycle':testing_box,'return':return_box,'mse':mse_box})
possible_choice = report[report['return'] >10]
best_cycle = possible_choice[possible_choice['mse']==min(possible_choice['mse'])]

In [None]:
print(best_cycle['cycle'])
cycle_analysis(stock_data['close'], '2020-01-20', 225, stock_name, forecast_plot=True, print_ind=True)

In [None]:
### Defining a function to return best buy and sell date ###

# forecast
# stock_data: 
# cycle:
# cycle_name:
# time_name:

def return_dates(forecast,stock_data,cycle,cycle_name = 'self_define_cycle',time_name = 'ds'):
  # Find out the highest and lowest dates in the first cycle
  high = forecast.iloc[:cycle,]
  high = high[high[cycle_name]==max(high[cycle_name])][time_name]
  high = datetime.strptime(str(high.values[0])[:10],"%Y-%m-%d")
  low = forecast.iloc[:cycle,]
  low = low[low[cycle_name]==min(low[cycle_name])][time_name]
  low = datetime.strptime(str(low.values[0])[:10],"%Y-%m-%d")
  end_dt = datetime.strptime(stock_data.index[-1],"%Y-%m-%d")
  find_list = stock_data.index.map(lambda x:datetime.strptime(x,"%Y-%m-%d"))
  # Finding selling and buying dates with loop
  sell_dt = []
  sell_dt.append(high)
  # Looking for new cycle until it goes beyond the last date in stock_data
  while high<end_dt:
    high = high+timedelta(days=cycle)
    dif = (find_list-high).days
    high = find_list[abs(dif)==min(abs(dif))][0] # In order to avoid the non-trading dates
    sell_dt.append(high)
  buy_dt = []
  buy_dt.append(low)
  # Looking for new cycle until it goes beyond the last date in stock_data
  while low<end_dt:
    low = low+timedelta(days=cycle)
    dif = (find_list-low).days
    low = find_list[abs(dif)==min(abs(dif))][0] # In order to avoid the non-trading dates
    buy_dt.append(low)
  if buy_dt[0] > sell_dt[0]:
    sell_dt = sell_dt[1:]
  buy_dt = buy_dt[:-1]
  sell_dt = sell_dt[:-1]
  return [buy_dt,sell_dt]