In [None]:
import pandas as pd
import requests as rq

In [None]:
df_full = pd.read_html('https://en.wikipedia.org/wiki/List_of_cryptocurrencies')[1]
name_sym_map = df_full[['Currency', 'Symbol']].copy()
name_sym_map['Symbol'] = name_sym_map['Symbol'].str.split(',').str[0]
name_sym_map = name_sym_map.rename(columns={'Currency': 'name', 'Symbol': 'symbol'})
name_sym_map.set_index('name', inplace=True)
name_sym_dict = name_sym_map.to_dict(orient='dict')['symbol']
# import pprint
# pprint.pprint(name_sym_dict)

In [None]:
alpha_vantage_key = '3R1QF2L06FHFJGYL'

In [None]:
crypto_names_selection = ['Bitcoin', 'Ethereum', 'XRP', 'Dogecoin']

https://www.alphavantage.co/documentation/#news-sentiment

In [None]:
alpha_vantage_base_url = 'https://www.alphavantage.co/query'

In [None]:
def get_multiple(base_url, parm_list):
    data_list = []
    for item in parm_list:
        p, sym = item
        response = rq.get(base_url, params=p)
        # print(f'params:{p}, response.status_code:{response.status_code}')
        data = response.json()
        print(f'symbol: {sym}, data.items:{data["items"]}')
        data_list.append({'sym': sym, 'data': data})
    return data_list

In [None]:
def assemble_requests(start=None, end=None):
    base_tickers = 'COIN,FOREX:USD'
    params = {'function': 'NEWS_SENTIMENT', 'apikey': alpha_vantage_key}
    if start is not None:
        params['time_from'] = f'{start}T0000'
        if end is not None:
            params['time_to'] = f'{end}T0000'
    rq_param_list = []
    for name in crypto_names_selection:
        try:
            symbol = name_sym_dict[name]
        except KeyError:
            symbol = name
        new_params = params.copy()
        new_params['tickers'] = f'CRYPTO:{symbol},{base_tickers}'
        rq_param_list.append((new_params, symbol))
    return rq_param_list    

[Optional: time_from and time_to](https://www.alphavantage.co/documentation/#news-sentiment)

The time range of the news articles you are targeting, in YYYYMMDDTHHMM format. For example: time_from=20220410T0130. If time_from is specified but time_to is missing, the API will return articles published between the time_from value and the current time.

### We are looking at daily
so start, end positional args are just YYYYMMDD
eg: 
```
start_date = '20160101'
end_date = '20221023'
```

In [None]:
rq_param_list = assemble_requests('20170101', '20221023')
# print(rq_param_list)
data = get_multiple(alpha_vantage_base_url, rq_param_list)


## no limit's has these result from no limit 

symbol: BTC, data.items:18
symbol: ETH, data.items:12
symbol: XRP, data.items:5
symbol: DOGE, data.items:6

In [None]:

import json
with open('./response_data.json', 'w') as f:
    json.dump(data, f)

In [None]:
def flatten_feeds(data):
    sentiment_data = []
    for record in data.copy():
        symbol = record['sym']
        feed = record['data']['feed']
        for feed_item in feed:
             for ticker in feed_item['ticker_sentiment']:
                if ticker['ticker'].endswith(symbol) and ticker["ticker_sentiment_label"] != 'Neutral':
                    ticker['symbol'] = symbol
                    ticker['time_published'] = feed_item['time_published']
                    ticker['summary'] = feed_item['summary']
                    ticker['url'] = feed_item['url']
                    sentiment_data.append(ticker)
    return sentiment_data                


In [None]:

sentiment_data = flatten_feeds(data)     

In [None]:
import json
with open('./sentiment_data.json', 'w') as f:
    json.dump(sentiment_data, f)

In [None]:
sentiment_df = pd.DataFrame(sentiment_data)
print(len(sentiment_df))
sentiment_df.drop(columns=['ticker', 'relevance_score'], inplace=True)
sentiment_df['time_published'] = pd.to_datetime(sentiment_df['time_published'])
sentiment_df.set_index('time_published', inplace=True)
sentiment_df.sort_index(inplace=True)
sentiment_df


In [None]:
sentiment_df.to_csv('./sentiment_data.csv')