# Extracting the European Stocks data

### Below python code extracts the stocks data from yfinance API against provided stock codes from CSV file

#### Author: Rohan Indrajeet Jadhav [x20169043@studemt.ncirl.ie]

#### Contents

1. Import the CSV file for reading list of stocks and their countries
2. Extract the data for those stocks from yfinance api
3. pre-process the data before loading into MongoDB
4. Load the data into MongoDB
    a. DB name: covidStockAnalysis
    b. Collection Name: stocksData

In [1]:
# Importing the libraries required 

from pprint import pprint
import pandas as pd
import yfinance as yf
import datetime 
import pandas as pd
from pymongo import MongoClient
from pandas_datareader import data as pdr
import csv
import os


In [2]:
# Setting up the dates for extracting the stock data within given range
# Targeting for time frame from where COVID started and till now.

start = datetime.datetime(2019,12,1) 
end = datetime.datetime.now() 

dateTimeVar = "Stock data will be extracted from {0} till {1}".format(start,end)
print(dateTimeVar)

Stock data will be extracted from 2019-12-01 00:00:00 till 2021-04-23 22:22:18.882845


### Reading the CSV file names StocksExchange.csv to extract list of stocks and country name

![alt text](stockformat.PNG "Title")

In [3]:
# Reading the CSV file names StocksExchange.csv for stock codes and their countries

inputStocks = []
with open('StocksExchange.csv') as stockDetails:
    csv_reader = csv.reader(stockDetails, delimiter=',')
    headerChecker = 0
    for row in csv_reader:
        if(headerChecker == 0 ):
            headerChecker += 1
            continue
               
        else:
            inputStocks.append(row) 

print(inputStocks)

[['SOFIX', 'Austria'], ['BGBX40', 'Austria'], ['BGREIT', 'Austria'], ['BGTR30', 'Austria'], ['CGIX', 'Austria'], ['HZVG', 'Belgium'], ['IGH', 'Belgium'], ['DLKV', 'Belgium'], ['ATPL', 'Belgium'], ['DOY', 'Ireland'], ['A5G', 'Ireland'], ['DD7E', 'Ireland'], ['DD7D', 'Ireland'], ['BIRG', 'Ireland'], ['C5H', 'Ireland'], ['DD8A', 'Ireland'], ['DD8B', 'Ireland'], ['CRG', 'Ireland'], ['DHG', 'Ireland'], ['DLE', 'Ireland'], ['GUI1', 'Ireland'], ['DQ7A', 'Ireland'], ['IR5B', 'Ireland'], ['IRES', 'Ireland'], ['ORPH', 'Ireland'], ['ENI', 'Italy'], ['ENEL', 'Italy'], ['ISP', 'Italy'], ['UCG', 'Italy'], ['TS', 'Italy'], ['ATL', 'Italy'], ['RDSA', 'Netherland'], ['UNA', 'Netherland'], ['ASML', 'Netherland'], ['HEIA', 'Netherland'], ['REL', 'Netherland'], ['ITX', 'Spain'], ['SAN', 'Spain'], ['IBE', 'Spain'], ['BBVA', 'Spain'], ['TEF', 'Spain'], ['AMS', 'Spain'], ['MTS', 'Spain'], ['IEKG', 'Romania'], ['POSR', 'Romania'], ['MELR', 'Romania'], ['ZVTG', 'Romania'], ['LKPG', 'Romania'], ['DAX30', 'Germa

In [4]:
#Testing the MongoDB connection before loading the extracted data 
# DB name used : "covidStockAnalysis"
#Collection used : "stocksData"

try:
    client = MongoClient()
    print(client)
    db = client.stockDB
    print(db)
    collection = db.stocksData
except:
    print("MongoDB connection Issue please check details: ")

MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'stockDB')


### Extracts the stock data against list of stock passed from csv file and formats it to load into MongoDB

In [5]:
# This command will read the stock names extracted from CSV above
# Extract the stock data from yfinance API for stock codes 
# Extracted code will get reformed into dictionary in order to load it to mongoDB in documents
# This again creates csv file whose stock data is not available right now.

totalNoOfStocks = 0
dbName = "covidStockAnalysis"
collectionName = 'stocksData'

#Remove the CSV file before creating it

try:
    os.remove('stocksListDataNotAvailable.csv')
except:
    print("Unable to remove the file")

#Dropping the collection before loading data

try:
    db[collectionName].drop();
    print('Collection dropped successfully!!')
except:
    print("Error in dropping the collection: {0} under {1} ".format(collectionName,dbName))

for stock in inputStocks:
    
    stockCode = stock[0]
    countryName = stock[1]
    print(stockCode)
    
    
    try:
        dfStockValues = yf.download(stockCode,start=start, end=end )
    except ValueError as ver:
        print("Stock Data is not available for: ", stockCode)
        
    if dfStockValues.empty:
        
        print("Stock Data is not available for: ", stockCode)
        
        try:
            with open('stocksListDataNotAvailable.csv', mode='a') as stockData:
                stockWriter = csv.writer(stockData, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                stockWriter.writerow([stock, countryName])
        except OSError:
            print("Error in reading file")
            
    else:
        
        records_in_df = len(dfStockValues.index)
        
# Adds 2 columns named stockName and CoountryName extracted from CSV file provided. yfinance api does not give the stock country name

        dfStockValues.reset_index(level=0, inplace=True)
        dfStockValues['stockName'] = stockCode
        dfStockValues['countryName'] = countryName
        after_reset = dfStockValues.reset_index()

# Checks MongoDB connection if working fine inserts the formatted record
        try:
            client = MongoClient()
            print(client)
            db = client.stockDB
            print(db)
            collection = db.stockdata
        except:
            print("MongoDB connection error")
    
        after_reset.index = after_reset.index.map(str)
        temp_dict = after_reset.transpose().to_dict()
        
        for oneOff in temp_dict.values():
            db.stocksData.insert_one(oneOff)
            
        totalNoOfStocks = totalNoOfStocks + records_in_df

print("Total records: ", totalNoOfStocks)

Collection dropped successfully!!
SOFIX
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- SOFIX: No data found, symbol may be delisted
Stock Data is not available for:  SOFIX
BGBX40
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- BGBX40: No data found, symbol may be delisted
Stock Data is not available for:  BGBX40
BGREIT
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- BGREIT: No data found, symbol may be delisted
Stock Data is not available for:  BGREIT
BGTR30
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- BGTR30: No data found, symbol may be delisted
Stock Data is not available for:  BGTR30
CGIX
[*********************100%***********************]  1 of 1 completed
MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)
Database(MongoClient(host=['localhost:27017'], document_class=dict

TEF
[*********************100%***********************]  1 of 1 completed
MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'stockDB')
AMS
[*********************100%***********************]  1 of 1 completed
MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'stockDB')
MTS
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- MTS: No data found for this date range, symbol may be delisted
Stock Data is not available for:  MTS
IEKG
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- IEKG: No data found, symbol may be delisted
Stock Data is not available for:  IEKG
POSR
[*********************100%***********************]  1 of 1 completed

1 Fail

[*********************100%***********************]  1 of 1 completed
MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'stockDB')
UL
[*********************100%***********************]  1 of 1 completed
MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'stockDB')
RHHBY
[*********************100%***********************]  1 of 1 completed
MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'stockDB')
NVS
[*********************100%***********************]  1 of 1 completed
MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True)
Database(MongoClient(host

In [6]:
# This is calling the next notbook which will read the loaded MongoDb data

%run ./read_from_mongoDB.ipynb