## Web Scraping 

https://consult.gov.scot/elections-and-constitutional-development-division/draft-referendum-bill/consultation/view_respondent?uuId=633076343

## Imports

In [1]:
# import sys
# print(sys.executable)
#! /opt/homebrew/Cellar/jupyterlab/3.4.8_1/libexec/bin/python3.11 -m pip install pandas

In [2]:
## Imports
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Parsing the URL of an answer (from a web-from)

In [3]:
# URL of the webpage
url = 'https://consult.gov.scot/elections-and-constitutional-development-division/draft-referendum-bill/consultation/view_respondent?uuId=633076343'

# Send a GET request to the webpage
response = requests.get(url)

# Create a BeautifulSoup object to parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')

print(soup)


<!DOCTYPE html>

<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
<head>
<base href="https://consult.gov.scot/elections-and-constitutional-development-division/draft-referendum-bill/"/>
<meta content="IE=Edge" http-equiv="X-UA-Compatible"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<meta content="Find and participate in consultations run by The Scottish Government" name="description"/>
<meta content="noindex" name="robots"/>
<title>
            Response 633076343 to Consultation on a Draft Referendum Bill
             - Scottish Government
            - Citizen Space
        </title>
<link href="/static_frontend/vendor/lato-2.015/fonts.css" rel="stylesheet"/>
<link href="/static_frontend/vendor/dlb-public-ui/css/bootstrap.css" rel="stylesheet" type="text/css"/>
<link href="https://consult.gov.scot/static_frontend/vendor/dlb-public-ui/css/dlb-public-ui-compiled.css" rel="s

## Finding Respond_ID 

In [4]:
li = soup.find('div', class_= "dss-easy-reading")
child = li.findChildren("h2" , recursive=False)
respond_id=child[0].get_text().split("Response ")[1]
print(respond_id)
                        


633076343


## Finding the Questions

In [5]:
#question_elements = soup.find_all('div', class_= "cs-question-padding")
# Find the question and answer elements using appropriate HTML tags and class names
question_elements = soup.find_all('h3', class_= "dss-heading-x-small dss-heading-bold")
#print(question_elements)

questions=[]
for row in question_elements:
    questions.append(row.get_text().strip())
for i in range(0, len(questions)):
    q= questions[i].strip().replace("\n                                                ","")                                          
    char_str = '' .join((z for z in q if not z.isdigit()))
    questions[i]=char_str.replace(".","")
    
    
print(questions)

['What is your name?', 'Are you responding as an individual or an organisation?', 'The Scottish Government would like your permission to publish your consultation response Please indicate your publishing preference:', 'What are your views on the proposed arrangements for managing the referendum?', 'What are your views on the proposed technical changes to polling and count arrangements?']


## Finding the Answers

In [75]:
answer_elements = soup.find_all('div', class_= "cs-question-padding")
#print(answer_elements)



### 1. Name

In [76]:
answers=[]
answer_elements = soup.find_all('div', class_= "cs-fake-text-input")
answers.append(answer_elements[0].get_text().strip())
print(answers)

['Fiona Smith']


### 2 & 3. Radio Buttons: Indiviual/Organization;  Publish info Answers 

In [83]:
radio_answer_elements = soup.find_all('span', class_= "sr-only")
print(radio_answer_elements[3:])

[<span class="sr-only">
                
                Radio button:
                Ticked
                
              </span>, <span class="sr-only">
                
                Radio button:
                
                Unticked
              </span>, <span class="sr-only">
                
                Radio button:
                Ticked
                
              </span>, <span class="sr-only">
                
                Radio button:
                
                Unticked
              </span>, <span class="sr-only">
                
                Radio button:
                
                Unticked
              </span>, <span class="sr-only">What are your views on the proposed arrangements for managing the referendum? </span>, <span class="sr-only">What are your views on the proposed technical changes to polling and count arrangements? </span>]


In [78]:
radio_answers=[]
for row in range(3, len(radio_answer_elements)):
    if "Ticked" in radio_answer_elements[row].get_text().strip():
        radio_answers.append(row-2)
#print(radio_answers)
radio_possible_answers={}
radio_possible_answers[1]="Individual"
radio_possible_answers[2]="Organisation"
radio_possible_answers[3]="Publish response with name"
radio_possible_answers[4]="Publish response only (without name)"
radio_possible_answers[5]="Do not publish response"
for i in radio_answers:
    answers.append(radio_possible_answers[i])
print(answers)

['Fiona Smith', 'Individual', 'Publish response with name']


### 4 & 5. Views

In [79]:
answers_elements = soup.find_all('div', class_= "cs-fake-textarea-input cs-fake-textarea-input-height-10")
#print(answers_elements)

answers_tmp=[]
for row in answers_elements:
    answers.append(row.get_text().strip())

print(answers)

['Fiona Smith', 'Individual', 'Publish response with name', 'I have confidence in the polling arrangements and therefore am happy for them to contiinue . However, more scrutiny within the polling stations  woul alleviate the fears of others. Also more checks should be done that people voting are actually resident and not using family addresses.', 'The  improvements seem sensible.']


In [80]:
print(questions)

['What is your name?', 'Are you responding as an individual or an organisation?', 'The Scottish Government would like your permission to publish your consultation response Please indicate your publishing preference:', 'What are your views on the proposed arrangements for managing the referendum?', 'What are your views on the proposed technical changes to polling and count arrangements?']


In [81]:
header=["id"] + questions
answers=[respond_id] + answers
answers

['633076343',
 'Fiona Smith',
 'Individual',
 'Publish response with name',
 'I have confidence in the polling arrangements and therefore am happy for them to contiinue . However, more scrutiny within the polling stations  woul alleviate the fears of others. Also more checks should be done that people voting are actually resident and not using family addresses.',
 'The  improvements seem sensible.']

## Creating a dataframe with the questions and answers

In [82]:
# Create the DataFrame
df = pd.DataFrame([answers], columns=header)

# Print the DataFrame
df

Unnamed: 0,id,What is your name?,Are you responding as an individual or an organisation?,The Scottish Government would like your permission to publish your consultation response Please indicate your publishing preference:,What are your views on the proposed arrangements for managing the referendum?,What are your views on the proposed technical changes to polling and count arrangements?
0,633076343,Fiona Smith,Individual,Publish response with name,I have confidence in the polling arrangements ...,The improvements seem sensible.
