# This is a Notebook for web scraping the top 25 mutual funds from yahoo finance

### First step is installing the required libraries (Remove the # sign to install)

In [1]:
#pip install bs4
#pip install requests
#pip install yfinance

### Second step is import packages and modules
#### csv is for write/save csv file
#### bs4 is for pulling data from html
#### requests is for request and get the url
#### yfinance is for get data of mutual funds

In [2]:
import csv
from io import StringIO
from bs4 import BeautifulSoup
import requests
import yfinance as yf

### Next step is get the url 
#### - First, give the url of the top 25 mutual funds
#### - After that, get the data from the specified url, as 'response' variable.
#### - With the response variable we inspect the result of the get() function.
#### - Code=Reason: 200=OK; 404=Not Found; 403=Forbidden

In [3]:
url_topmutual_funds = 'https://finance.yahoo.com/mutualfunds'
response = requests.get(url_topmutual_funds)
response

In [None]:
response.reason

### With BeautifulSoup we get a BeautifulSoup object, which represent the url document as a nested data structure

In [4]:
soup = BeautifulSoup(response.text, 'html.parser')

### After that we select all of the table rows (tr)
#### Find every 'tr' tag with class: 'simpTblRow' ( .find_all('tag', 'class'))

In [5]:
funds = soup.find_all('tr','simpTblRow')

### If we got the table rows than we need to get the symbol and name of the mutual funds
#### In 'get_mutualfund' function, we're looking for 'a' **tag(s)* in 'td' tags.
#### In the a tag(s) Symbol is a text and name is the title of the tag, so wee get the text with the .text function, and the name with the .get('title') function.
#### We use the for loop to collect the Symbols and the names of  25 mutual funds.
#### *There is just one 'a' tag within 'td' tag.

In [6]:
def get_mutualfund(fund):
    atag = fund.td.a
    Symbol = atag.text
    Name = atag.get('title')
    mutualfund = (Symbol, Name)
    return mutualfund
    
mutualfunds = []
for fund in funds:
    mutualfund = get_mutualfund(fund)
    mutualfunds.append(mutualfund)

### Save the mutual funds' symbols and names as csv

In [7]:
with open('top_mutual_funds.csv','w', newline='',encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['Symbol', 'Name'])
    writer.writerows(mutualfunds)

### Create a list with the symbols only
#### Reason is that, to collect the history data, we only need the symbol of the mutual fund.

In [8]:
symbols = []
for i in mutualfunds:
    symbols.append(i[0])

### Define a function to get the historical data of the fund
#### With the .Ticker() function we get informations of the specified mutual fund.
#### The .history() function give us the historical data of the mutual fund for a specified period.

In [9]:
def get_history(symbol, period):
    symbol = yf.Ticker(symbol)
    symbol = symbol.history(period = period)
    return symbol

### Write a for loop to get the historical data of all funds and save them as csv files (into current folder).

In [10]:
for i in range(len(symbols)):
    locals()[symbols[i]] = symbols[i]
    get_history(symbols[i], "5y")
    get_history(symbols[i], "5y").to_csv(symbols[i] + '_history.csv')