# Part One
Import required tools for collecting and cleaning the data: <br>
* Some tools are more commonly used
* Some tools are specific to the Alpha Vantage API

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
ALPHA_VANTAGE_API_KEY = os.getenv('secret')

In [2]:
import json
import requests
import collections
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from ast import literal_eval
import numpy as np
import datetime
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import scipy.stats as sp
import pickle

In [3]:
from alpha_vantage.foreignexchange import ForeignExchange
from alpha_vantage.timeseries import TimeSeries

import asyncio
from alpha_vantage.async_support.timeseries import TimeSeries

# Part Two
Obtain forex data from the Alpha Vantage API: <br>
* I am obtaining pricing data for the past 100 days for pairings of the top ten most traded currencies <br>
* For each pairing I am taking only the data required into a pandas dataframe, adding a column for "to" and "from" symbol to keep track of each currency, and then concatenating the dataframes into a large dataframe for easier use

* Assign API call to a variable "app"

In [4]:
app = ForeignExchange(key=ALPHA_VANTAGE_API_KEY, output_format='pandas')

* Create empty dataframe

In [5]:
all_pairs = pd.DataFrame()

Here we initialize two lists:
* CURRENCY_1: the currency we are converting FROM
* CURRENCY_2: the currency we are converting TO

In [6]:
CURRENCY_1 = ('USD',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)

Create the for loop:
* Double for loop to iterate through CURRENCY_1 and CURRENCY_2
* If statement to be sure that currency is not converted to itself
* Restrict data to usable part returned by API
* Create columns for "to symbol" and "from symbol"
* Rename columns for ease of use
* Convert to datetime, sort index
* Add features
* Reorder columns
* Concatenate to main dataframe

In [7]:
for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

* Repeat for all other currencies:

In [8]:
CURRENCY_1 = ('USD',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [9]:
CURRENCY_1 = ('EUR',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [10]:
CURRENCY_1 = ('EUR',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [11]:
CURRENCY_1 = ('JPY',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [12]:
CURRENCY_1 = ('JPY',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [13]:
CURRENCY_1 = ('GBP',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [14]:
CURRENCY_1 = ('GBP',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [15]:
CURRENCY_1 = ('AUD',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [16]:
CURRENCY_1 = ('AUD',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [17]:
CURRENCY_1 = ('CHF',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [18]:
CURRENCY_1 = ('CHF',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [19]:
CURRENCY_1 = ('CAD',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [20]:
CURRENCY_1 = ('CAD',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [21]:
CURRENCY_1 = ('HKD',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [22]:
CURRENCY_1 = ('HKD',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [23]:
CURRENCY_1 = ('SEK',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [24]:
CURRENCY_1 = ('SEK',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [25]:
CURRENCY_1 = ('NZD',)
CURRENCY_2 = ('USD', 'EUR', 'JPY', 'GBP', 'AUD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

In [26]:
CURRENCY_1 = ('NZD',)
CURRENCY_2 = ('CHF', 'CAD', 'HKD', 'SEK', 'NZD',)


for c1 in CURRENCY_1:
  for c2 in CURRENCY_2:
    if c1 != c2:
      currconv = app.get_currency_exchange_daily(c1, c2)
      currconv1 = currconv[0]
      currconv1['to symbol'] = c2
      currconv1['from symbol'] = c1

      currconv1.rename(columns={'1. open': 'open'}, inplace=True)
      currconv1.rename(columns={'2. high': 'high'}, inplace=True)
      currconv1.rename(columns={'3. low': 'low'}, inplace=True)
      currconv1.rename(columns={'4. close': 'close'}, inplace=True)

      pd.to_datetime(currconv1.index)
      currconv1 = currconv1.sort_index()

      currconv1['time'] = np.arange(len(currconv1.index))

      currconv1['lag_1'] = currconv1['close'].shift(1)
      currconv1['lag_2'] = currconv1['close'].shift(2)
      currconv1['lag_3'] = currconv1['close'].shift(3)
      currconv1['lag_4'] = currconv1['close'].shift(4)
      currconv1['lag_5'] = currconv1['close'].shift(5)
      currconv1['lag_6'] = currconv1['close'].shift(6)
      currconv1['lag_7'] = currconv1['close'].shift(7)

      currconv1['rolling_mean'] = currconv1['close'].rolling(window=7).mean()
      currconv1['expanding_mean'] = currconv1['close'].expanding(2).mean()

      currconv1 = currconv1[['from symbol', 'to symbol', 'open', 'high', 'low', 'close', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'rolling_mean', 'expanding_mean', 'time']]

      all_pairs = pd.concat([all_pairs, currconv1])

# Part Three
Obtain and clean sentiment data

* Create empty dataframe to concatenate to

In [27]:
all_sentiment = pd.DataFrame()

* Request from API
* Get json

In [28]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:USD&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

The data that was returned is very layered:
* Convert to list
* Pop unneeded items

In [29]:
listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

('relevance_score_definition',
 '0 < x <= 1, with a higher score indicating higher relevance.')

* Flatten list twice (I found this worked the best):

In [30]:
flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

* Explode 'ticker sentiment' column:

In [31]:
dftest = dftest.explode('ticker_sentiment')

* Delete unneeded columns
* Reset index

In [32]:
del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

Use a for loop to access dictionary inside 'ticker sentiment' column:
* Even though the API returns data for the specified ticker, it also returns duplicate rows of the same data for other tickers that the same news article also applies to
* I want to keep the data as organized as possible and do not want to deal with messy duplicates later after requesting from the API for other tickers
* The for loop checks if the ticker equals the specified currency - if not, the row is dropped

In [33]:
drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:USD':
                drop_values.append(k)

* Create new dataframe from list
* Sort values by overall sentiment score

In [34]:
usdforexsentiment = dftest.drop(index=drop_values)
usdforexsentiment = usdforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

* Add a column for symbol
* Set index to symbol

In [35]:
usdforexsentiment['symbol'] = 'USD'
usdforexsentiment.set_index('symbol', inplace=True)

* Concatenate to all_sentiment dataframe
* Check individual dataframe head to confirm everything worked

In [36]:
all_sentiment = pd.concat([all_sentiment, usdforexsentiment])

usdforexsentiment.head()

Unnamed: 0_level_0,title,url,time_published,authors,overall_sentiment_score,overall_sentiment_label,ticker_sentiment
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
USD,Mitsubishi raises profit forecast on higher co...,https://www.reuters.com/business/autos-transpo...,20221108T121400,[Reuters],0.335897,Somewhat-Bullish,"{'ticker': 'FOREX:USD', 'relevance_score': '0...."
USD,ETF Asset Report of October,https://www.zacks.com/stock/news/2015630/etf-a...,20221108T180000,[Zacks Investment Research],0.334772,Somewhat-Bullish,"{'ticker': 'FOREX:USD', 'relevance_score': '0...."
USD,Ethereum-Based Prediction Market Shows Red Wav...,https://decrypt.co/113844/ethereum-based-predi...,20221108T154155,[Decrypt / Ben Munster],0.306016,Somewhat-Bullish,"{'ticker': 'FOREX:USD', 'relevance_score': '0...."
USD,1 in 5 College Students in the US Has Used The...,https://www.investorideas.com/news/2022/crypto...,20221108T154049,[],0.301102,Somewhat-Bullish,"{'ticker': 'FOREX:USD', 'relevance_score': '0...."
USD,These 3 European Companies Could See Their Div...,https://www.benzinga.com/news/large-cap/22/11/...,20221108T165857,[Robert Kuczmarski],0.25688,Somewhat-Bullish,"{'ticker': 'FOREX:USD', 'relevance_score': '0...."


* Find mean of overall sentiment score

In [37]:
mean = usdforexsentiment['overall_sentiment_score'].mean()

* Create empty column in all_pairs dataframe

In [38]:
all_pairs['sentiment_score'] = ''

Repeat for other currencies:

In [39]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:EUR&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

dftest = dftest.explode('ticker_sentiment')

del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:EUR':
                drop_values.append(k)

eurforexsentiment = dftest.drop(index=drop_values)
eurforexsentiment = eurforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

eurforexsentiment['symbol'] = 'EUR'
eurforexsentiment.set_index('symbol', inplace=True)

all_sentiment = pd.concat([all_sentiment, eurforexsentiment])

eurmean = eurforexsentiment['overall_sentiment_score'].mean()

In [40]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:JPY&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

dftest = dftest.explode('ticker_sentiment')

del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:JPY':
                drop_values.append(k)

jpyforexsentiment = dftest.drop(index=drop_values)
jpyforexsentiment = jpyforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

jpyforexsentiment['symbol'] = 'JPY'
jpyforexsentiment.set_index('symbol', inplace=True)

all_sentiment = pd.concat([all_sentiment, jpyforexsentiment])

jpymean = jpyforexsentiment['overall_sentiment_score'].mean()

In [41]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:GBP&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

dftest = dftest.explode('ticker_sentiment')

del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:GBP':
                drop_values.append(k)

gbpforexsentiment = dftest.drop(index=drop_values)
gbpforexsentiment = gbpforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

gbpforexsentiment['symbol'] = 'GBP'
gbpforexsentiment.set_index('symbol', inplace=True)
                            
all_sentiment = pd.concat([all_sentiment, gbpforexsentiment])

gbpmean = gbpforexsentiment['overall_sentiment_score'].mean()

In [43]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:AUD&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

dftest = dftest.explode('ticker_sentiment')

del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:AUD':
                drop_values.append(k)

audforexsentiment = dftest.drop(index=drop_values)
audforexsentiment = audforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

audforexsentiment['symbol'] = 'AUD'
audforexsentiment.set_index('symbol', inplace=True)

all_sentiment = pd.concat([all_sentiment, audforexsentiment])

audmean = audforexsentiment['overall_sentiment_score'].mean()

In [44]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:CHF&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

dftest = dftest.explode('ticker_sentiment')

del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:CHF':
                drop_values.append(k)

chfforexsentiment = dftest.drop(index=drop_values)
chfforexsentiment = chfforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

chfforexsentiment['symbol'] = 'CHF'
chfforexsentiment.set_index('symbol', inplace=True)

all_sentiment = pd.concat([all_sentiment, chfforexsentiment])

chfmean = chfforexsentiment['overall_sentiment_score'].mean()

In [45]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:CAD&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

dftest = dftest.explode('ticker_sentiment')

del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:CAD':
                drop_values.append(k)

cadforexsentiment = dftest.drop(index=drop_values)
cadforexsentiment = cadforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

cadforexsentiment['symbol'] = 'CAD'
cadforexsentiment.set_index('symbol', inplace=True)

all_sentiment = pd.concat([all_sentiment, cadforexsentiment])

cadmean = cadforexsentiment['overall_sentiment_score'].mean()

In [46]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:HKD&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

dftest = dftest.explode('ticker_sentiment')

del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:HKD':
                drop_values.append(k)

hkdforexsentiment = dftest.drop(index=drop_values)
hkdforexsentiment = hkdforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

hkdforexsentiment['symbol'] = 'HKD'
hkdforexsentiment.set_index('symbol', inplace=True)

all_sentiment = pd.concat([all_sentiment, hkdforexsentiment])

hkdmean = hkdforexsentiment['overall_sentiment_score'].mean()

In [47]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:SEK&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

dftest = dftest.explode('ticker_sentiment')

del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:SEK':
                drop_values.append(k)

sekforexsentiment = dftest.drop(index=drop_values)
sekforexsentiment = sekforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

sekforexsentiment['symbol'] = 'SEK'
sekforexsentiment.set_index('symbol', inplace=True)

all_sentiment = pd.concat([all_sentiment, sekforexsentiment])

sekmean = sekforexsentiment['overall_sentiment_score'].mean()

In [48]:
url2 = 'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=forex:NZD&from_date:20220901T0000&apikey={}'.format(ALPHA_VANTAGE_API_KEY)
r2 = requests.get(url2)
data2 = r2.json()

listsent = list(data2.items())

listsent.pop(0)
listsent.pop(0)
listsent.pop(0)

flat_ls = []
for i in listsent:
    for j in i:
        flat_ls.append(j)
 
flat_ls.pop(0)

flat_ls2 = []
for i in flat_ls:
    for j in i:
        flat_ls2.append(j)
 
dftest = pd.DataFrame(flat_ls2)

dftest = dftest.explode('ticker_sentiment')

del dftest['summary']
del dftest['banner_image']
del dftest['category_within_source']
del dftest['source_domain']
del dftest['source']
del dftest['topics']

dftest = dftest.reset_index(inplace = False, drop = True)

drop_values = []
for k, v in dftest['ticker_sentiment'].iteritems():
    for j, i in v.items():
        if j == 'ticker':
            if i != 'FOREX:NZD':
                drop_values.append(k)

nzdforexsentiment = dftest.drop(index=drop_values)
nzdforexsentiment = nzdforexsentiment.sort_values(by='overall_sentiment_score', ascending=False)

nzdforexsentiment['symbol'] = 'NZD'
nzdforexsentiment.set_index('symbol', inplace=True)

all_sentiment = pd.concat([all_sentiment, nzdforexsentiment])

nzdmean = nzdforexsentiment['overall_sentiment_score'].mean()

Save all_sentiment to pickle:

In [49]:
all_sentiment.to_pickle('all_sentiment.pkl')

* Use a for loop to apply sentiment score mean to sentiment score column in all_pairs:

In [50]:
for symbol in range(len(all_pairs['to symbol'])):
    all_pairs.loc[(all_pairs['to symbol'] == 'USD'), 'sentiment_score'] = mean
    all_pairs.loc[(all_pairs['to symbol'] == 'EUR'), 'sentiment_score'] = eurmean
    all_pairs.loc[(all_pairs['to symbol'] == 'JPY'), 'sentiment_score'] = jpymean
    all_pairs.loc[(all_pairs['to symbol'] == 'GBP'), 'sentiment_score'] = gbpmean
    all_pairs.loc[(all_pairs['to symbol'] == 'AUD'), 'sentiment_score'] = audmean
    all_pairs.loc[(all_pairs['to symbol'] == 'CHF'), 'sentiment_score'] = chfmean
    all_pairs.loc[(all_pairs['to symbol'] == 'CAD'), 'sentiment_score'] = cadmean
    all_pairs.loc[(all_pairs['to symbol'] == 'HKD'), 'sentiment_score'] = hkdmean
    all_pairs.loc[(all_pairs['to symbol'] == 'SEK'), 'sentiment_score'] = sekmean
    all_pairs.loc[(all_pairs['to symbol'] == 'NZD'), 'sentiment_score'] = nzdmean

View all_pairs:

In [51]:
all_pairs.head(10)

Unnamed: 0_level_0,from symbol,to symbol,open,high,low,close,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,rolling_mean,expanding_mean,time,sentiment_score
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-06-22,USD,EUR,0.9489,0.9547,0.9426,0.9462,,,,,,,,,,0,0.092788
2022-06-23,USD,EUR,0.946,0.9536,0.9449,0.9499,0.9462,,,,,,,,0.94805,1,0.092788
2022-06-24,USD,EUR,0.9501,0.9509,0.9459,0.9471,0.9499,0.9462,,,,,,,0.947733,2,0.092788
2022-06-27,USD,EUR,0.947,0.9476,0.9418,0.9446,0.9471,0.9499,0.9462,,,,,,0.94695,3,0.092788
2022-06-28,USD,EUR,0.9445,0.9518,0.9427,0.9501,0.9446,0.9471,0.9499,0.9462,,,,,0.94758,4,0.092788
2022-06-29,USD,EUR,0.9501,0.958,0.9488,0.9575,0.9501,0.9446,0.9471,0.9499,0.9462,,,,0.949233,5,0.092788
2022-06-30,USD,EUR,0.9576,0.9628,0.9533,0.954,0.9575,0.9501,0.9446,0.9471,0.9499,0.9462,,0.949914,0.949914,6,0.092788
2022-07-01,USD,EUR,0.9537,0.9641,0.9537,0.9591,0.954,0.9575,0.9501,0.9446,0.9471,0.9499,0.9462,0.951757,0.951063,7,0.092788
2022-07-04,USD,EUR,0.9592,0.9597,0.9556,0.9593,0.9591,0.954,0.9575,0.9501,0.9446,0.9471,0.9499,0.9531,0.951978,8,0.092788
2022-07-05,USD,EUR,0.959,0.9768,0.9569,0.9739,0.9593,0.9591,0.954,0.9575,0.9501,0.9446,0.9471,0.956929,0.95417,9,0.092788


In [52]:
all_pairs.tail(10)

Unnamed: 0_level_0,from symbol,to symbol,open,high,low,close,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,rolling_mean,expanding_mean,time,sentiment_score
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-10-26,NZD,SEK,6.2921,6.3517,6.2853,6.3005,6.2852,6.3241,6.3951,6.3897,6.3352,6.284,6.2769,6.330543,6.411047,90,0.160037
2022-10-27,NZD,SEK,6.3024,6.4091,6.2454,6.3798,6.3005,6.2852,6.3241,6.3951,6.3897,6.3352,6.284,6.344229,6.410708,91,0.160037
2022-10-28,NZD,SEK,6.3808,6.4186,6.3355,6.3431,6.3798,6.3005,6.2852,6.3241,6.3951,6.3897,6.3352,6.345357,6.409981,92,0.160037
2022-10-31,NZD,SEK,6.3576,6.4277,6.3427,6.3853,6.3431,6.3798,6.3005,6.2852,6.3241,6.3951,6.3897,6.344729,6.409718,93,0.160037
2022-11-01,NZD,SEK,6.3853,6.4542,6.3853,6.4308,6.3853,6.3431,6.3798,6.3005,6.2852,6.3241,6.3951,6.349829,6.40994,94,0.160037
2022-11-02,NZD,SEK,6.4285,6.4854,6.4167,6.4521,6.4308,6.3853,6.3431,6.3798,6.3005,6.2852,6.3241,6.368114,6.410379,95,0.160037
2022-11-03,NZD,SEK,6.4567,6.4721,6.3954,6.448,6.4521,6.4308,6.3853,6.3431,6.3798,6.3005,6.2852,6.391371,6.410767,96,0.160037
2022-11-04,NZD,SEK,6.4347,6.4877,6.4347,6.4581,6.448,6.4521,6.4308,6.3853,6.3431,6.3798,6.3005,6.413886,6.41125,97,0.160037
2022-11-07,NZD,SEK,6.4066,6.4443,6.3813,6.4368,6.4581,6.448,6.4521,6.4308,6.3853,6.3431,6.3798,6.422029,6.411508,98,0.160037
2022-11-08,NZD,SEK,6.438,6.4468,6.3551,6.4039,6.4368,6.4581,6.448,6.4521,6.4308,6.3853,6.3431,6.430714,6.411432,99,0.160037


Save all_pairs to pickle:

In [53]:
all_pairs.to_pickle('all_pairs.pkl')