<a href="https://colab.research.google.com/github/sjdee/Market-Analysis-Techniques/blob/master/NeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
from sklearn import metrics
from sklearn.metrics import classification_report
import numpy as np
import warnings
warnings.filterwarnings("ignore")


def run_classifier (pDf,minify_data,less_columns,print_data):  

  
  df = pDf
  
  if(less_columns==True):
    df = df.loc[:,['BEST_ANALYST_RATING', 'RETURN_ON_ASSET', 'BEST_TARGET_PRICE', 'CUR_MKT_CAP', 'SHORT_INT', 'TOT_BUY_REC', 'TOT_SELL_REC', 'day1','day2', 'day3', 'day4','day5', 'day6','day7', 'day8', 'day9', 'day10']]
    df.dropna(inplace=True)
    features = df.iloc[:,:-10]
    labels = df.iloc[:,-10:]
    
  else:
    df.dropna(inplace=True)
    features = df.iloc[:,5:-12]
    labels = df.iloc[:,-10:]
    
        
  report_data = []


  for i in range(len(labels.columns)):
    # specify the feature set, target set, the test size and random_state to select records randomly
    X_train, X_test, y_train, y_test = train_test_split(features, labels.iloc[:,i], test_size=0.3,random_state=0) 


    # Scaling values in the feature set
    scaling = MinMaxScaler(feature_range=(0,1)).fit(X_train)
    X_train = scaling.transform(X_train)
    X_test = scaling.transform(X_test)


    # Create a random forest Classifier
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=0)

    # Train the model using the training sets
    clf.fit(X_train, y_train)

    # Predict the response for test dataset
    y_pred = clf.predict(X_test)


    accuracy = metrics.accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    f1_scores =[] 
    f1_scores.insert(0, metrics.f1_score(y_test, y_pred, average='macro'))
    f1_scores.insert(1, metrics.f1_score(y_test, y_pred, average='micro'))
    f1_scores.insert(2, metrics.f1_score(y_test, y_pred, average='weighted'))
    
    
    if(print_data==True):
      print(labels.iloc[:,i].name)
      print(accuracy)
      print(report)
    else:
      accumulate_data(report_data,report,accuracy,labels.iloc[:,i].name,"Neural Network",minify_data,f1_scores)
  
  return report_data

In [0]:
def accumulate_data(report_data,report,accuracy,day_name,model_name,minify_data,f1_scores):
      
    
  print(day_name)
  print(report)
  
  if(minify_data == True):

    row = {}
    row['day'] = day_name.replace("day", "") 
    row['accuracy'] = accuracy
    row['f1score_macro'] = f1_scores[0]
    row['f1score_micro'] = f1_scores[1]
    row['f1score_weigthed'] = f1_scores[2]
    # row['model'] = model_name

    # unravel report for the given day       
    lines = report.split('\n')
    for line in lines[2:-5]:

      row_data = line.split('     ')

      # update recall for sell
      if(float(row_data[1])==0.0):
        row['sell_recall']= float(row_data[3])
      # update precison for buy
      if(float(row_data[1])==2.0):
        row['buy_precison']= float(row_data[2])

    report_data.append(row)


  else:    
    lines = report.split('\n')
    for line in lines[2:-5]:
        row = {}
        row_data = line.split('     ')
        row['model'] = model_name
        row['accuracy'] = accuracy
        row['day'] = labels.iloc[:,i].name
        row['class'] = row_data[1]
        row['precision'] = float(row_data[2])
        row['recall'] = float(row_data[3])
        row['f1_score'] = float(row_data[4])
        row['support'] = float(row_data[5])
        report_data.append(row)

  return report_data

In [0]:
sectors = ['Communication Services', 'Consumer Discretionary','Consumer Staples', 'Energy', 'Financials', 'Health Care', 'Industrials', 'Information Technology', 'Materials', 'Real Estate', 'Utilities']

In [0]:
# https://www.analyticsvidhya.com/blog/2017/09/naive-bayes-explained/
# https://datascience.stackexchange.com/questions/24275/scikit-mlpclassifier-vs-tensorflow-dnnclassifier

!pip install -U -q PyDrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)


file_id = '18pa4iuqvz2SX5RYrUdn09bDU8eNm2hqI'

# 2. Load a file by ID 
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile('sp500_transformation_input.xlsx')

[?25l[K     |▎                               | 10kB 25.7MB/s eta 0:00:01[K     |▋                               | 20kB 6.2MB/s eta 0:00:01[K     |█                               | 30kB 8.8MB/s eta 0:00:01[K     |█▎                              | 40kB 5.6MB/s eta 0:00:01[K     |█▋                              | 51kB 6.9MB/s eta 0:00:01[K     |██                              | 61kB 8.1MB/s eta 0:00:01[K     |██▎                             | 71kB 9.2MB/s eta 0:00:01[K     |██▋                             | 81kB 10.3MB/s eta 0:00:01[K     |███                             | 92kB 11.4MB/s eta 0:00:01[K     |███▎                            | 102kB 9.1MB/s eta 0:00:01[K     |███▋                            | 112kB 9.1MB/s eta 0:00:01[K     |████                            | 122kB 9.1MB/s eta 0:00:01[K     |████▎                           | 133kB 9.1MB/s eta 0:00:01[K     |████▋                           | 143kB 9.1MB/s eta 0:00:01[K     |█████                   

In [0]:
import pandas as pd

df = pd.read_excel('sp500_transformation_input.xlsx')

In [0]:
df.set_index('Sector', inplace=True)

In [0]:
check_each_sector = False
less_columns = True
minify = True
print_data = False

In [0]:
from google.colab import files

if(check_each_sector == True):
   
  print('Running classifier by sector.')
  for k in range(len(sectors)):

    df_sectorised = df.loc[sectors[k]]
    
    report = run_classifier(df_sectorised,minify,less_columns,print_data)

    
    dataframe = pd.DataFrame.from_dict(report)
    file_name = sectors[k]+'NeuralNetwork_classification_report.csv'
    dataframe.to_csv(file_name, index = False)
#     files.download(file_name)
    
else:
  print('Running classifier on all.')
  
  report = run_classifier(df,minify,less_columns,print_data)

  dataframe = pd.DataFrame.from_dict(report)
  dataframe.to_csv('NeuralNetwork_classification_report.csv', index = False)
#   files.download('NeuralNetwork_classification_report.csv')

Running classifier on all.
day1
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00     52535
         2.0       0.58      1.00      0.74     73687

    accuracy                           0.58    126222
   macro avg       0.29      0.50      0.37    126222
weighted avg       0.34      0.58      0.43    126222

day2
              precision    recall  f1-score   support

         0.0       0.73      1.00      0.84     91528
         2.0       0.00      0.00      0.00     34694

    accuracy                           0.73    126222
   macro avg       0.36      0.50      0.42    126222
weighted avg       0.53      0.73      0.61    126222

day3
              precision    recall  f1-score   support

         0.0       0.68      1.00      0.81     86012
         2.0       0.00      0.00      0.00     40210

    accuracy                           0.68    126222
   macro avg       0.34      0.50      0.41    126222
weighted avg       0.46      0.6