# Ramsey King
# DSC 680 - Applied Data Science
# April 9 - 2022

###  This Jupyter file will pull the talk information down from the websites speeches.byu.edu and www.churchofjesuschrist.org/study/general-conference/speakers

In [1]:
# Retrieve talks by speaker on speeches.byu.edu using BeautifulSoup

from bs4 import BeautifulSoup as bs
import requests
# import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re

In [6]:
# Get list of urls for each of the topics
# For my reference, number of talks by speaker:  Nelson - 22, Maxwell - 30, Holland - 27, Hinckley - 41, Monson - 14, Oaks - 35, Eyring - 25 (Total - 194)
url = "https://speeches.byu.edu/speakers/"

url_list = [
    'russell-m-nelson/', 'neal-a-maxwell/', 'jeffrey-r-holland/', 'gordon-b-hinckley/', 'thomas-s-monson/',
    'dallin-h-oaks', 'henry-b-eyring'
]

links = []
new_links = []

# some of the links are repeated with '?M=V' and '?M=A' and those will be removed so we can have a unique set of links to pull talk information from.

for u in url_list:
    response = requests.get(url+u)
    html = response.content
    video = '?M=V'
    soup = bs(html, 'lxml')
    for link in soup.findAll("a", attrs={'href': re.compile("^https://speeches.byu.edu/talks/[\D+]")}):
        links.append(link.get('href').replace(video, ''))

    audio = '?M=A'
    for link in links:
        new_link = link.replace(audio, '')
        new_links.append(new_link)
    new_links = list(set(new_links))



In [11]:
len(new_links)

195

In [12]:
# We may have one duplicate, so something to be aware when we pull the talk data.

'''Retrieves talk text from each of the urls using beautiful soup.  Information such as the talk/sermon text, title, topic, date, and speaker will be retreived
and put into a dataframe.  A csv file will also be saved due to the time it takes to run or in case we need to work offline.'''
talk_text = []
title_text = []
topics = []
dates = []
speakers = []
speaker_position = []

for link in new_links:
    url = link
    response = requests.get(url)
    html = response.content
    soup = bs(html, 'lxml')
    temp_list = []

    link_splitter = link.split('/')
    speakers.append(link_splitter[4].replace('-', ' '))

    result = soup.find('p', class_='single-speech__speaker-subtext single-speech__speaker-position')
    speaker_position.append(result.text if result else 'Title Not Found')

    for div in soup.find_all('div', class_='single-speech__content'):
        talk_text.append(div.get_text(strip=True))
    for h1 in soup.find_all('h1', class_='single-speech__title'):
        title_text.append(h1.get_text(strip=True))
    for p in soup.find_all('p', class_='single-speech__speaker-subtext single-speech__date'):
        dates.append(p.get_text(strip=True))
    for p in soup.find_all('p', class_='single-speech__related-tag'):
        temp_list.append(p.get_text(strip=True))
        temp_str = ','.join(temp_list)
    topics.append(temp_str)


dict = {'titles': title_text , 'speaker': speakers, 'speaker_position': speaker_position , 'date': dates, 'talks': talk_text, 'topics': topics, }
df = pd.DataFrame(dict)
df.to_csv('talks.csv')

df.head()

Unnamed: 0,titles,speaker,speaker_position,date,talks,topics
0,On the Lord’s Errand,jeffrey r holland,president of Brigham Young University,"March 27, 1985",My remarks this morning will be a little diffe...,BYU
1,At Their Most Enlightened and Alert,jeffrey r holland,President of Brigham Young University,"September 6, 1988",Welcome back to school. As Sister Holland has ...,"Government,Honor"
2,Some Things We Have Learned—Together,jeffrey r and patricia t holland_things learne...,President of Brigham Young University and Wife,"January 15, 1985",Jeff:Each time we have given an address to thi...,"Love,Marriage,Collection: Love and Marriage,Po..."
3,Sharing Insights from My Life,neal a maxwell,of the Quorum of the Twelve Apostles,"January 12, 1999",It is a special time for you as students in te...,"Life,Spiritual Growth"
4,Good Judgment and Common Sense,henry b eyring,Commissioner of the Church Educational System,"August 16, 1985","To succeed in life, we need the kind of good j...","Learning,Life"
