# Introduction
This is a small web scraping project, resources provided by `Alex The Analyst`, below is his YouTube link for the tutorial.

`https://www.youtube.com/watch?v=8dTpNajxaH0&list=PLUaB-1hjhk8FE_XZ87vPPSfHqb6OcM0cF&index=56`

### Getting Started
- `BeautifulSoup` is a Py library for pulling data out of HTML and XML files
- `request` library provides API for sending various HTTP requests and handling responses

In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
url='https://www.scrapethissite.com/pages/forms/'

page = requests.get(url)

#page sends request
#.text retrives raw html
#html determines how to parse the data

soup = BeautifulSoup(page.text, 'html')

In [3]:
#print(soup)

In [4]:
#index search is not required since there is only one table in this html page
soup.find_all('table')[0]

<table class="table">
<tr>
<th>
                            Team Name
                        </th>
<th>
                            Year
                        </th>
<th>
                            Wins
                        </th>
<th>
                            Losses
                        </th>
<th>
                            OT Losses
                        </th>
<th>
                            Win %
                        </th>
<th>
                            Goals For (GF)
                        </th>
<th>
                            Goals Against (GA)
                        </th>
<th>
                            + / -
                        </th>
</tr>
<tr class="team">
<td class="name">
                            Boston Bruins
                        </td>
<td class="year">
                            1990
                        </td>
<td class="wins">
                            44
                        </td>
<td class="losses">
                            2

In [5]:
#performs the same function as above
#soup.find('table', class_='table')

In [6]:
#storing value in table variable
table = soup.find_all('table')[0]

In [7]:
#searching for headers or column titles
table_title = soup.find_all('th')
table_title

[<th>
                             Team Name
                         </th>,
 <th>
                             Year
                         </th>,
 <th>
                             Wins
                         </th>,
 <th>
                             Losses
                         </th>,
 <th>
                             OT Losses
                         </th>,
 <th>
                             Win %
                         </th>,
 <th>
                             Goals For (GF)
                         </th>,
 <th>
                             Goals Against (GA)
                         </th>,
 <th>
                             + / -
                         </th>]

In [8]:
#filtering unwanted spaces and texts in the titles
table_titles = [title.text.strip() for title in table_title]
print(table_titles)

['Team Name', 'Year', 'Wins', 'Losses', 'OT Losses', 'Win %', 'Goals For (GF)', 'Goals Against (GA)', '+ / -']


In [9]:
#importing pandas library
import pandas as pd

In [10]:
#creating dataframe with columns
df = pd.DataFrame(columns= table_titles)
df

Unnamed: 0,Team Name,Year,Wins,Losses,OT Losses,Win %,Goals For (GF),Goals Against (GA),+ / -


In [16]:
#storing data in variable with 'tr' elements
column_data = table.find_all('tr')
#column_data

In [12]:
#loop for filtering the data
for row in column_data[1:]:
    row_data = row.find_all('td')
    single_row_data = [data.text.strip() for data in row_data]

    length = len(df)
    df.loc[length] = single_row_data

In [13]:
df.head()

Unnamed: 0,Team Name,Year,Wins,Losses,OT Losses,Win %,Goals For (GF),Goals Against (GA),+ / -
0,Boston Bruins,1990,44,24,,0.55,299,264,35
1,Buffalo Sabres,1990,31,30,,0.388,292,278,14
2,Calgary Flames,1990,46,26,,0.575,344,263,81
3,Chicago Blackhawks,1990,49,23,,0.613,284,211,73
4,Detroit Red Wings,1990,34,38,,0.425,273,298,-25


In [14]:
#following code is use to import the created dataframe to a csv file
#df.to_csv(r'path\HokeyTeam.csv', index = False)