In [53]:
from iexcloud.iexcloud import iexCloud
import pandas as pd
import numpy as np
import nltk
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.svm import LinearSVC
from prophet import Prophet
from datetime import date
import datetime


nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = stopwords.words('english')

from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english")

file_path = r'all-data.csv'

iex = iexCloud()

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/sachinsuri/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [57]:
df = iex.get_max_time_series_df('AAPL')
df['Date'] = pd.to_datetime(df['Date'])
#condition = df['df'] >= date.today()
date_obj = datetime.datetime(2022, 5, 9)
condition = df['Date'] >= date_obj
#df = df[df['Date'] >= date_obj]
df.loc[condition, 'AAPL'] = 1.5 * df.loc[condition, 'AAPL']
df



Fetching time series data for AAPL


Unnamed: 0,Date,AAPL
0,2017-05-15,38.9250
1,2017-05-16,38.8675
2,2017-05-17,37.5625
3,2017-05-18,38.1350
4,2017-05-19,38.2650
...,...,...
1254,2022-05-09,228.0900
1255,2022-05-10,231.7650
1256,2022-05-11,219.7500
1257,2022-05-12,213.8400


FETCH NEWS ARTICLES FROM IEXCLOUD API

TRAIN SENTIMENT ANALYSIS MODULE

In [38]:
#Function to clean data
def clean_data(df, test):
    #turn all letters to lowercase
    df['sentence'] = df['sentence'].str.lower()

    #normalise text data & remove numbers
    df["sentence"] = df['sentence'].str.replace(
            "(@\[A-Za-z]+)|([^A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?",''
        )

    #Remove stop words
    df['sentence'] = df['sentence'].apply(
            lambda x: ' '.join([word for word in x.split() if word not in (stop_words)])
        )

    df['sentence (format)'] = df['sentence'].str.split(" ")
    df['sentence (stemmed)'] = df['sentence (format)'].apply(lambda x: [stemmer.stem(y) for y in x])
    df = df.drop(['sentence (format)'], axis=1)

    df['sentence stemmed'] = df['sentence'].apply(lambda x: ''.join([str(elem) for elem in x]))
    df = df.drop(['sentence (stemmed)'], axis=1)

    if not test:
        sentiment_score = {
            'neutral': 0,
            'negative': -1,
            'positive': 1
        }

        df['sentiment'] = df['sentiment'].replace(sentiment_score)
        df.columns = df.columns.str.replace(" ", "")
    
        df = df.drop(['sentiment'], axis=1)
        
    return df

In [39]:
df = pd.read_csv(file_path, encoding='ISO-8859-1', names=['sentiment', 'sentence'])
df_train = clean_data(df, True)
df_train

  df["sentence"] = df['sentence'].str.replace(


Unnamed: 0,sentiment,sentence,sentence stemmed
0,neutral,according gran company plans move production r...,according gran company plans move production r...
1,neutral,technopolis plans develop stages area less squ...,technopolis plans develop stages area less squ...
2,negative,international electronic industry company elco...,international electronic industry company elco...
3,positive,new production plant company would increase ca...,new production plant company would increase ca...
4,positive,according company updated strategy years baswa...,according company updated strategy years baswa...
...,...,...,...
4841,negative,london marketwatch share prices ended lower lo...,london marketwatch share prices ended lower lo...
4842,neutral,rinkuskiai beer sales fell per cent million li...,rinkuskiai beer sales fell per cent million li...
4843,negative,operating profit fell eur mn eur mn including ...,operating profit fell eur mn eur mn including ...
4844,negative,net sales paper segment decreased eur mn secon...,net sales paper segment decreased eur mn secon...


In [40]:
#Tfidf
tfidf = TfidfVectorizer(max_features = 5000)
x = df['sentence']
y = df['sentiment']

x = tfidf.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(
    x,
    y,
    #stratify=y,
    random_state=1,
    test_size =0.2,
    shuffle=True
)

clf = LinearSVC()
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    negative       0.69      0.53      0.60       122
     neutral       0.78      0.87      0.82       570
    positive       0.71      0.60      0.65       278

    accuracy                           0.75       970
   macro avg       0.73      0.67      0.69       970
weighted avg       0.75      0.75      0.74       970



In [42]:
#predictions on test set
ticker_symbol = 'AAPL'
news_df = iex.get_news(ticker_symbol)

x = tfidf.transform(news_df['headline'])
y_pred = clf.predict(x)

y_pred

sentiment_count = {
    0: 0,
    1: 0,
    2: 0
}

for sentiment in y_pred:
    if sentiment == 'neutral':
        sentiment_count[1] += 1
    if sentiment == 'positive':
        sentiment_count[2] += 1
    if sentiment == 'negative':
        sentiment_count[0] += 1 

print(sentiment_count)
weighted_average = (
    (sentiment_count[1] * 1 +
    sentiment_count[2] * 2 +
    sentiment_count[0] * 0) / 100
)
weighted_average


{0: 1, 1: 93, 2: 6}


1.05

TRAIN TIMESERIES FORECASTING MODULE

In [None]:
ticker_symbol = 'AAPL'
ticker_data = iex.get_max_time_series_df(ticker_symbol)
ticker_data = ticker_data.rename(columns = {
    'Date': 'ds',
    ticker_symbol: 'y'
})
ticker_data

In [None]:
model_params = {
    "daily_seasonality": False,
    "weekly_seasonality": False,
    "yearly_seasonality": True,
    "seasonality_mode": "multiplicative",
    "growth": "logistic"
}


model = Prophet(**model_params)
ticker_data["cap"] = ticker_data['y'].max() + ticker_data['y'].std() * 0.05

model.fit(ticker_data)

future = model.make_future_dataframe(periods=100)
future["cap"] = ticker_data["cap"].max()

forecast = model.predict(future)
print(forecast)
model.plot_components(forecast)
model.plot(forecast)



COMBINE TIMESERIES FORECASTING WITH SENTIMENT ANALYSIS