### Function which scrapes a given Quora Link and returns a data frame with Username, Profile Link and Answers along with the same details for related questions.

In [140]:
import bs4
import urllib
import pandas as pd
import itertools

In [156]:
def scrape_quora(input_url):
    page = urllib.request.urlopen(input_url).read()
    soup = bs4.BeautifulSoup(page,'lxml')
    # fetch user details
    users = soup.findAll('div',{'class':'feed_item_answer_user'})
    answered_users = []
    for i in users:
        answered_users.append(i.get_text())
        
    # fetch answers
    texts = soup.findAll('div',{'class':'ui_qtext_expanded'})
    answers = []
    for i in texts:
        answers.append(i.get_text())
        
    # fetch profile links
    users_profile = soup.findAll('a',{'class':'user'})
    profile_links = []
    for i in users_profile:
        profile_id = i['href']
        profile_links.append('https://www.quora.com'+profile_id)
    
    # some processing to remove extra profile links
    dic = {}
    for i in answered_users:
        name = i.strip().split(',')[0].split(' ')[0]
        for links in profile_links:
            if name in links:
                dic[name] = links
                
    users = list(dic.keys())
    links = list(dic.values())
    users = pd.Series(users)
    answers = pd.Series(answers)
    profile_links = pd.Series(links)
    
    # combine everything into a data frame
    df = pd.concat([users.rename('Users'),profile_links.rename('Links'),answers.rename('Answers')],axis=1)
    return(df)

In [96]:
def scrape_related_questions(input_url):
    
    page = urllib.request.urlopen(input_url).read()
    soup = bs4.BeautifulSoup(page,'lxml')
    all_related_questions = soup.findAll('li',{'class':'related_question'})
    related_question_links = []
    tmp = {}
    for i in all_related_questions:
        q_text = i.get_text().strip().replace(' ','-')
        q_link = 'https://www.quora.com/'+q_text
        if q_text not in tmp:
            print(q_link)
            try:
                tmp[q_text] = scrape_quora(q_link)  
            except Exception:
                continue
        else:
            continue
    return(tmp)

In [158]:
df = scrape_related_questions('https://www.quora.com/Should-I-study-data-science?')

https://www.quora.com/Which-is-better-to-study,-data-science-or-big-data?
https://www.quora.com/How-good-are-Data-Science-studies-in-China?
https://www.quora.com/Are-there-any-online-Data-Science-study-groups?
https://www.quora.com/How-do-I-plan-a-data-science-self-study?
https://www.quora.com/How-do-I-get-motivation-to-study-data-science?
https://www.quora.com/Where-should-I-study-data-sciences-and/or-actuarial-science?
https://www.quora.com/Where-can-I-find-full-data-science-case-studies?
https://www.quora.com/What-is-required-to-study-data-science?
https://www.quora.com/Where-are-the-best-institutes-to-study-data-science?
https://www.quora.com/Is-Glasgow-University-a-good-place-for-studying-data-science?
https://www.quora.com/Is-data-science-evil?
https://www.quora.com/What-is-target-data-in-data-mining?
https://www.quora.com/Is-Data-Science-a-science?
https://www.quora.com/Data-science-requirements?
https://www.quora.com/What-is-the-component-of-big-data?
https://www.quora.com/Is-d

In [166]:
dff = pd.DataFrame.from_dict(df,orient='index')

In [169]:
dff.to_csv('scr.csv')