In [2]:
from eodhd import APIClient
import requests 
import pandas as pd
import time
from tqdm import tqdm
import os


api = "64d77f6d3a60a5.24835840"



In [3]:
# First define the log function to gather the log information
def log(response,logfile, stock, offset,output_path=os.getcwd()):
    # Open or create the csv file
    if os.path.isfile(logfile): #If the log file exists, open it and allow for changes     
        log = open(logfile,'a')
    else: #If the log file does not exist, create it and make headers for the log variables
        log = open(logfile,'w')
        header = ['timestamp','status_code','length','Ticker','Offset']
        log.write(';'.join(header) + "\n") #Make the headers and jump to new line
        
    # Gather log information
    status_code = response.status_code #Status code from the request result
    timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) #Local time
    length = len(response.text) #Length of the HTML-string
    
    # Open the log file and append the gathered log information
    with open(logfile,'a') as log:
        log.write(f'{timestamp};{status_code};{length};{stock};{offset}' + "\n") #Append the information and jump to new line

In [4]:
# Opening the csv file named NASDAQ 100, reading it and making it into a dataframe

# Change the current working directory to the folder where the script is located
os.chdir('C:/Users/Soren/Documents/GitHub/IntroSocial23/Exam')

# Get the current working directory
current_directory = os.getcwd()

# Construct the path to the CSV file
csv_file_path = os.path.join(current_directory, 'Data collection','Data','NASDAQ 100.csv')

# Read the CSV file and create a DataFrame
df_companies = pd.read_csv(csv_file_path, delimiter=';', encoding='utf-8')

# make df_companies into a dataframe
df_companies = pd.DataFrame(df_companies)

In [6]:
# Get all news

# Dates
start_date = '2020-01-01'
end_date = '2023-07-31'

# number of articles pr. stock
n_news = 1000
stock_list = []
stock_list = df_companies['Symbol']
company_names = []
company_names = df_companies['Company']

logfile = 'log.csv'

def get_all_news(stocks, start_date, end_date, n_news, api_key, offset = 0):
    # create empty dataframe
    all_news = pd.DataFrame()
    for stock in tqdm(stocks, desc="Fetching News", unit="stock"):
        # url for the api call
        url = f'https://eodhistoricaldata.com/api/news?api_token={api_key}&s={stock}&limit={n_news}&offset={offset}&from={start_date}&to={end_date}'
        # get the json from the api call
        response = requests.get(url)
        news_json = response.json()
        
        # create dataframe from json
        df_news = pd.DataFrame.from_dict(news_json)
        # filter the dataframe
#       df_news = df_news[['date','title','content','symbols','sentiment']]
        
        # a new column with the company name 
        df_news['company'] = company_names[stock_list == stock].values[0]
        
        # concat df_news onto all_news
        all_news = pd.concat([all_news, df_news], ignore_index = True)
        
        # Log the response
        log(response, logfile, stock, offset)
        
        # save the dataframe
        all_news.to_csv('df_all_news.csv', index=False)
    return all_news 

df_all_news = get_all_news(stock_list, start_date, end_date, n_news, api)

print(df_all_news.head())
print(df_all_news.shape)
print(df_all_news.tail())

Fetching News: 100%|██████████| 101/101 [15:08<00:00,  8.99s/stock]

                        date  \
0  2023-07-31T23:54:16+00:00   
1  2023-07-31T21:07:35+00:00   
2  2023-07-31T21:01:39+00:00   
3  2023-07-31T21:01:27+00:00   
4  2023-07-31T20:52:44+00:00   

                                               title  \
0  Asian stocks push higher on tech strength, Chi...   
1  Dow Jones Futures: Microsoft, Tesla At Key Sup...   
2  The 2023 stock market rally got a lot healthie...   
3  Apple, Amazon: 'The setup is perfect for Big T...   
4  S&amp;P 500, Nasdaq rise for fifth month in a row   

                                             content  \
0  Investing.com -- Most Asian stocks rose on Tue...   
1  Dow Jones futures: Microsoft and Tesla stock c...   
2  July has come and gone for investors, bringing...   
3  Big Tech companies Apple and Amazon are due to...   
4  Both the S&P 500 and the Nasdaq Composite clos...   

                                                link  \
0  https://finance.yahoo.com/news/asian-stocks-pu...   
1  https://finance.ya




In [11]:
# save df_all_news to csv
df_all_news.to_csv('df_all_news.csv', index=False)

In [9]:
df_all_news.head()
# df_all_news.tail()

Unnamed: 0,date,title,content,link,symbols,tags,sentiment,company
0,2023-07-31T23:54:16+00:00,"Asian stocks push higher on tech strength, Chi...",Investing.com -- Most Asian stocks rose on Tue...,https://finance.yahoo.com/news/asian-stocks-pu...,"[0700.HK, 2007.HK, 9888.HK, 9988.HK, AAPL.US, ...",[],"{'polarity': 0.992, 'neg': 0.066, 'neu': 0.794...",Apple Inc
1,2023-07-31T21:07:35+00:00,"Dow Jones Futures: Microsoft, Tesla At Key Sup...",Dow Jones futures: Microsoft and Tesla stock c...,https://finance.yahoo.com/m/f19e2eed-9631-3b48...,"[AAPL.MX, AAPL.US, AAPL34.SA, ALGM.US, APC.F, ...",[],"{'polarity': 0, 'neg': 0, 'neu': 1, 'pos': 0}",Apple Inc
2,2023-07-31T21:01:39+00:00,The 2023 stock market rally got a lot healthie...,"July has come and gone for investors, bringing...",https://finance.yahoo.com/news/the-2023-stock-...,"[AAPL.MX, AAPL.US, AAPL34.SA, ABEA.F, ABEA.XET...",[],"{'polarity': 0.991, 'neg': 0.026, 'neu': 0.88,...",Apple Inc
3,2023-07-31T21:01:27+00:00,"Apple, Amazon: 'The setup is perfect for Big T...",Big Tech companies Apple and Amazon are due to...,https://finance.yahoo.com/video/apple-amazon-s...,"[AAPL.MX, AAPL.US, AAPL34.SA, AMZ.F, AMZ.XETRA...",[],"{'polarity': 1, 'neg': 0.032, 'neu': 0.794, 'p...",Apple Inc
4,2023-07-31T20:52:44+00:00,"S&amp;P 500, Nasdaq rise for fifth month in a row",Both the S&P 500 and the Nasdaq Composite clos...,https://finance.yahoo.com/video/p-500-nasdaq-r...,"[AAPL.MX, AAPL.US, AAPL34.SA, APC.F, APC.XETRA...",[],"{'polarity': 0.34, 'neg': 0, 'neu': 0.946, 'po...",Apple Inc


In [10]:
average_length_title = df_all_news['title'].apply(len).mean()

average_length_article = df_all_news['content'].apply(len).mean()

print('Average length of title: ', average_length_title)
print('Average length of article: ', average_length_article)

Average length of title:  69.11878796407045
Average length of article:  4291.63501723774


In [14]:
import pandas as pd

# Assuming df_all_news is your DataFrame
# Calculate the average number of words, commas, and periods in the 'title' and 'content' columns
df_all_news['title_words'] = df_all_news['title'].apply(lambda x: len(x.split()))
df_all_news['title_commas'] = df_all_news['title'].apply(lambda x: x.count(','))
df_all_news['title_periods'] = df_all_news['title'].apply(lambda x: x.count('.'))

df_all_news['content_words'] = df_all_news['content'].apply(lambda x: len(x.split()))
df_all_news['content_commas'] = df_all_news['content'].apply(lambda x: x.count(','))
df_all_news['content_periods'] = df_all_news['content'].apply(lambda x: x.count('.'))

# Calculate the average values for words, commas, and periods in title and content
average_words_title = df_all_news['title_words'].mean()
average_commas_title = df_all_news['title_commas'].mean()
average_periods_title = df_all_news['title_periods'].mean()

average_words_content = df_all_news['content_words'].mean()
average_commas_content = df_all_news['content_commas'].mean()
average_periods_content = df_all_news['content_periods'].mean()

# Print the results
print('Average number of words in title: ', average_words_title)
print('Average number of commas in title: ', average_commas_title)
print('Average number of periods in title: ', average_periods_title)

print('Average number of words in content: ', average_words_content)
print('Average number of commas in content: ', average_commas_content)
print('Average number of periods in content: ', average_periods_content)

Average number of words in title:  11.00032833790661
Average number of commas in title:  0.4043129529304158
Average number of periods in title:  0.245702291329534
Average number of words in content:  665.277480710148
Average number of commas in content:  41.426370224442415
Average number of periods in content:  47.344848612772346
