# Stock market sectors

You'll often see stocks broken down by the type of business they're in. The basic categories most often used include:

Communication Services -- telephone, internet, media, and entertainment companies

Consumer Discretionary -- retailers, automakers, and hotel and restaurant companies

Consumer Staples -- food, beverage, tobacco, and household and personal products companies

Energy -- oil and gas exploration and production companies, pipeline providers, and gas station operators

Financial -- banks, mortgage finance specialists, and insurance and brokerage companies

Healthcare -- health insurers, drug and biotech companies, and medical device makers

Industrial -- airline, aerospace and defense, construction, logistics, machinery, and railroad companies

Materials -- mining, forest products, construction materials, packaging, and chemical companies

Real Estate -- real estate investment trusts and real estate management and development companies

Technology -- hardware, software, semiconductor, communications equipment, and IT services companies

Utilities -- electric, natural gas, water, renewable energy, and multi-product utility companies

# The stock we may use
Select sp500 as a base line to compare with other category of stock

Select apple stock to present tech stock

Select bac stock to present financial stock

Select ual stock to present industrial stock

Select cake stock to present consumer discretionary

In [1]:
'''
Import all libraries we will use later
'''
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


In [3]:
'''
Use Data Reader from pandas to catch all stock data we need. 
'''

# For reading stock data from yahoo
from pandas_datareader.data import DataReader
# For time stamps
from datetime import datetime

# The tech stocks we'll use for this analysis
tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN']

# Set up End and Start times for data grab
end = datetime.now()
start = datetime(end.year - 1, end.month, end.day)

#For loop for grabing yahoo finance data and setting as a dataframe
for stock in tech_list:   
    # Set DataFrame as the Stock Ticker
    globals()[stock] = DataReader(stock, 'yahoo', start, end)
    globals()[stock]["company_name"] = stock

# all_stock_df = pd.read_csv('data/all_stocks_5yr.csv')
# Amazon_df = pd.read_csv('data/Amazon_Historical_StockPrice.csv')
sp500_df = DataReader('sp500', 'yahoo', start, end)

RemoteDataError: No data fetched for symbol sp500 using YahooDailyReader

In [None]:
'''
Check and show dataframe
'''
tech_df = pd.concat([AAPL, GOOG, MSFT, AMZN], axis=0)
tech_df.head()

In [None]:
'''
Clean useless data and bias
'''

print(tech_df.isnull().any())
print(tech_df[tech_df.isnull().values==True])
tech_df.fillna(0,inplace=True)
print(tech_df[tech_df.duplicated(keep=False)])

In [None]:
'''
Look at the trend of stock
'''

plt.figure(figsize=(16,8))
plt.title('Close Price History')
plt.plot(AAPL['Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()

In [None]:
'''
Use close price of stock to do the analysis. Split train and test data. 
'''

# Create a new dataframe with only the 'Close column 
data = AAPL.filter(['Close'])
# Convert the dataframe to a numpy array
dataset = data.values
# Get the number of rows to train the model on
train, test = train_test_split(dataset, test_size=0.2)
train_data = train[0:len(train), 0]
test_data = test[0:len(train), 0]

In [None]:
'''
Split x and y from train
'''

# Split the data into x_train and y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i])
    y_train.append(train_data[i])
#     if i<= 61:
#         print(x_train)
#         print(y_train)
#         print()
        
# Convert the x_train and y_train to numpy arrays 
x_train, y_train = np.array(x_train), np.array(y_train)
# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# x_train.shape

In [None]:
'''
Train model
'''

from keras.models import Sequential
from keras.layers import Dense, LSTM

# Build the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)

In [None]:
'''
Split x and y from test and check accuracy
'''

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
# Create the testing data set
# Create a new array containing scaled values from index 1543 to 2002 
# test_data = scaled_data[training_data_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
# y_test = dataset[training_data_len:, :]
y_test = []
temp = round(60/len(train_data) * len(test_data))
for i in range(temp, len(test_data)):
    x_test.append(test_data[i-temp:i])
    y_test.append(test_data[i])
    if i<= temp:
        print(x_test)
        print(y_test)
        print()
    
# Convert the data to a numpy array
x_test, y_test = np.array(x_test), np.array(y_test)
# x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))
# x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# Get the models predicted price values 
predictions = model.predict(x_test)
# predictions = scaler.inverse_transform(predictions)

# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
rmse