<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Yahoo Finance - Get [ENTITY] trends and predictions

## Input

### Get utils
This section enable the pipeline to run using common functions stored inside the /utils folder. 

In [None]:
# Load utils functions
from os import path

utils_path = "../utils/__utils__.ipynb"
if not path.exists(utils_path):
    utils_path = "utils/__utils__.ipynb"

%run "$utils_path"

### Install libraries

In [None]:
pip install nltk

### Import libraries

In [None]:
import naas
import pandas as pd
from naas_drivers import prediction, yahoofinance, plotly
import plotly.graph_objects as go
import markdown2
from datetime import datetime
from IPython.core.display import display, HTML

### Setup Yahoo Finance
👉 Here you can change the ticker and timeframe

In [None]:
NAME = "NAME_INPUT"
TICKER = "TICKER_INPUT"
RELATIVE_INDEX = "RELATIVE_INDEX_INPUT"

#NAME = "Amazon"
#TICKER = "AMZN"
#RELATIVE_INDEX = "^IXIC"

date_from = -270
date_to = "today"

### Setup Prediction
👉 Here you can change the number of data points you want the prediction will be performed on

In [None]:
DATA_POINT = 90

### Setup Assets

In [None]:
NOW = datetime.now().strftime("%Y-%m-%d")
csv_output = path.join(OUTPUTS_PATH,f"{NOW}_{TICKER}.csv")
image_output = path.join(OUTPUTS_PATH,f"{NOW}_{TICKER}.png")
html_output = path.join(OUTPUTS_PATH,f"{NOW}_{TICKER}.html")
tracker_output = path.join(OUTPUTS_PATH,f"{NOW}_{TICKER}_tracker.csv")

## Model

### Get dataset from Yahoo Finance

In [None]:
df_yahoo = yahoofinance.get(tickers=TICKER,
                            date_from=date_from,
                            date_to=date_to).dropna().reset_index(drop=True)

# Define the rolling window size
window_size = 20  # Change this to your desired window size

# Calculate the rolling minimum and maximum values of the 'total_predict' column
df_yahoo['min_rolling'] = df_yahoo['Close'].rolling(window=window_size).min()
df_yahoo['max_rolling'] = df_yahoo['Close'].rolling(window=window_size).max()

# Capitalize all column names
df_yahoo.columns = df_yahoo.columns.str.upper()

# Display dataframe
df_yahoo.tail(5)

### Add relative index data

In [None]:
# Get the NASDAQ Global Index data
nasdaq_data = yahoofinance.get(tickers=RELATIVE_INDEX,
                               date_from=date_from,
                               date_to=date_to).dropna().reset_index(drop=True)

# Calculate the relative strength
df_yahoo['NASDAQ'] = nasdaq_data['Close']
df_yahoo['RELATIVE_STRENGTH_BASE'] = (df_yahoo['CLOSE'] / nasdaq_data['Close']) 
# Calculate the relative strength percentage
df_yahoo['RELATIVE_STRENGTH'] = df_yahoo['CLOSE'] * ( 1 - df_yahoo['RELATIVE_STRENGTH_BASE'])


# Display dataframe
df_yahoo.tail(5)

### Create tracker data

In [None]:
def get_variation(df, TICKER):
    df_yahoo = df.sort_values("DATE", ascending=False).reset_index(drop=True)

    # Get value and date comp
    datanow = df_yahoo.loc[0, "CLOSE"]
    date_now = df_yahoo.loc[0, "DATE"]
    datayesterday = df_yahoo.loc[1, "CLOSE"]

    # Calc variation in value and %
    varv = datanow - datayesterday
    varp = (varv / datanow) * 100

    # Get min and max value
    min_value = df_yahoo["CLOSE"].min()
    max_value = df_yahoo["CLOSE"].max()
    
    # Calculate the score
    score = 0 + ((10 - 0) * (datanow - min_value) / (max_value - min_value))

    # Format result
    datanow = "${:,.2f}".format(round(datanow, 1))
    datayesterday = "${:,.2f}".format(round(datayesterday, 1))
    varv = "{:+,.2f}".format(varv)
    varp = "{:+,.2%}".format(varp / 100)  # dividing by 100 to undo the earlier multiplication by 100
    min_value = "${:,.2f}".format(round(min_value, 1))
    max_value = "${:,.2f}".format(round(max_value, 1))

    # Create a DataFrame to hold the results
    result_df = pd.DataFrame({
        "ENTITY": "Universal Tracker",
        "SCENARIO": [date_now],
        "INDICATOR": [TICKER],
        "TYPE": "Financial",
        "SOURCE": "Yahoo Finance",
        "VALUE": [datanow],
        "MIX": [min_value],
        "MAX": [max_value],
        "SCORE": [round(score, 2)]
    })

    return result_df

result_df = get_variation(df_yahoo, TICKER)
result_df.to_csv(tracker_output)
result_df

### Add prediction

In [None]:
df_predict = prediction.get(dataset=df_yahoo,
                            date_column='DATE',
                            column="CLOSE",
                            data_points=DATA_POINT,
                            prediction_type="all").sort_values("DATE", ascending=False).reset_index(drop=True)
# Display dataframe
df_predict.head(int(DATA_POINT)+5)

In [None]:
# Create 'TOTAL_PREDICT' column
df_predict['TOTAL_PREDICT'] = df_predict['LINEAR'].where(df_predict['LINEAR'].notna(), df_predict['CLOSE'])

# Calculate the 20 and 50 day moving averages
df_predict['MA05'] = df_predict['TOTAL_PREDICT'].rolling(window=5).mean()
df_predict['MA10'] = df_predict['TOTAL_PREDICT'].rolling(window=10).mean()
df_predict['MA50'] = df_predict['TOTAL_PREDICT'].rolling(window=50).mean()

# Display dataframe
df_predict.head()

### Get news data

In [None]:
from naas_drivers import newsapi
df = newsapi.connect().get(
    f"{NAME}", fields=["date","title", "image", "link", "description"]
)

In [None]:
df['date'] = pd.to_datetime(df['date']).dt.date
df = df.dropna()
df

### Analyze sentiment per news article

In [None]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')

def analyze_sentiment(df, columns):
    sid = SentimentIntensityAnalyzer()

    for column in columns:
        df[[f'{column}_neg', f'{column}_neu', f'{column}_pos', f'{column}_compound']] = df[column].apply(lambda x: pd.Series(sid.polarity_scores(x)))

    return df

# Test the function
df = analyze_sentiment(df, ['title', 'description'])

In [None]:
def calculate_sentiment(df, columns):
    for column in columns:
        # Calculate sums
        neg = df[f'{column}_neg'].sum().round(2)
        neu = df[f'{column}_neu'].sum().round(2)
        pos = df[f'{column}_pos'].sum().round(2)
        compound = df[f'{column}_compound'].sum().round(2)

        # Calculate total
        total = neg + neu + pos + compound

        # Calculate percentages
        neg_percent = (neg / total * 100).round(2)
        neu_percent = (neu / total * 100).round(2)
        pos_percent = (pos / total * 100).round(2)
        compound_percent = (compound / total * 100).round(2)

        # Append sums and percentages to df
        df = df.append({f'{column}_neg': neg, f'{column}_neu': neu, f'{column}_pos': pos, f'{column}_compound': compound,
                        f'{column}_neg_percent': neg_percent, f'{column}_neu_percent': neu_percent, 
                        f'{column}_pos_percent': pos_percent, f'{column}_compound_percent': compound_percent}, 
                       ignore_index=True)
    return df

# Test the function
df = calculate_sentiment(df, ['title', 'description'])
df

In [None]:
df['total_neg'] = df['title_neg'] + df['description_neg']  
df['total_neu'] = df['title_neu'] + df['description_neu']  
df['total_pos'] = df['title_pos'] + df['description_pos']  
df['total_compound'] = df['title_compound'] + df['description_compound']  
df

### Sum sentiment by category

In [None]:
# Calculate sums for 'title' and 'description'
title_neg = df.title_neg.sum().round(2)
title_neu = df.title_neu.sum().round(2)
title_pos = df.title_pos.sum().round(2)
title_compound = df.title_compound.sum().round(2)

desc_neg = df.description_neg.sum().round(2)
desc_neu = df.description_neu.sum().round(2)
desc_pos = df.description_pos.sum().round(2)
desc_compound = df.description_compound.sum().round(2)

# Calculate total sums
total_neg = title_neg + desc_neg
total_neu = title_neu + desc_neu
total_pos = title_pos + desc_pos
total_compound = title_compound + desc_compound

# Calculate percentage contribution of each category
neg_percent = (total_neg / (total_neg + total_neu + total_pos + total_compound) * 100).round(2)
neu_percent = (total_neu / (total_neg + total_neu + total_pos + total_compound) * 100).round(2)
pos_percent = (total_pos / (total_neg + total_neu + total_pos + total_compound) * 100).round(2)
compound_percent = (total_compound / (total_neg + total_neu + total_pos + total_compound) * 100).round(2)

# Preview of what will be sent by email:
print(f"Sum of news sentiment by category:")
print(
    "\n\t🔴 Negative \t",
    total_neg,
    f"({neg_percent}%)",
    "\n\t🟠 Neutral\t",
    total_neu,
    f"({neu_percent}%)",
    "\n\t🟢 Positive \t",
    total_pos,
    f"({pos_percent}%)",
    "\n\t🔵 Compound \t",
    total_compound,
    f"({compound_percent}%)",
)


In [None]:
df.columns = df.columns.str.upper()
df

### Merge the stock and news dataframes

In [None]:
df['DATE'] = pd.to_datetime(df['DATE'])
df_predict['DATE'] = pd.to_datetime(df_predict['DATE'])


merged_df = pd.merge(df_predict, df, on='DATE', how='left')

### Save and share the dataframe in CSV

In [None]:
merged_df.to_csv(csv_output)

link_csv = naas.asset.add(csv_output,override_prod=True)

### Plot linechart

In [None]:
fig = plotly.linechart(merged_df,
                       x="DATE",
                       y=["CLOSE", "LINEAR","RELATIVE_STRENGTH", "MA05", "MA10", "MA50", "MAX_ROLLING", "MIN_ROLLING"],
                       showlegend=True,
                       title = f'''<b><span style='font-size: 20px;'>{NAME} Trends & Prediction +{str(DATA_POINT)} days</span></b>
                       <br><span style='font-size: 10px;'><b>News Analysis:</b> 🔴 Negative: {total_neg} ({neg_percent}%), 🟠 Neutral: {total_neu} ({neu_percent}%), 🟢 Positive: {total_pos} ({pos_percent}%), 🔵 Compound: {total_compound} ({compound_percent}%)</span>
                       '''
                      )

## Output

### Save and share your graph in PNG

In [None]:
# Save your graph in PNG
fig.write_image(image_output)

# Share output with naas
link_image = naas.asset.add(image_output,override_prod=True)

#-> Uncomment the line below to remove your asset
# naas.asset.delete(image_output)

### Save and share your graph in HTML


In [None]:
# Save your graph in HTML
fig.write_html(html_output)

# Share output with naas
link_html = naas.asset.add(html_output, params={"inline": True},override_prod=True)

#-> Uncomment the line below to remove your asset
# naas.asset.delete(html_output)

### Save assets to JSON 

In [None]:
json_assets = {
    'link_image': link_image,
    'link_html': link_html
}

with open(path.join(OUTPUTS_PATH,f"{TICKER}_{NOW}.json"),'w') as f:
    json.dump(json_assets,f)