In [1]:
# Run this cell to import required libraries

from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt

## Scraping Student-Clubs site

In [2]:
data_url = "https://sites.duke.edu/prattgsps/student-clubs/"
page = requests.get(data_url)
soup = BeautifulSoup(page.content, 'html.parser')

In [3]:
# Find all elements with the class "elementor-widget-container"
elements = soup.find_all(class_="elementor-widget-container")

# # Loop through and print the elements
# i = 0
# for element in elements:
#     print(i)
#     print(element)
#     print("__________________________________")
#     i += 1
student_clubs_html = elements[6].find_all('p')
activities = {}
for student_club in student_clubs_html:
    text = student_club.get_text()
    activities[text.split(" Club")[0]] = text
activities

{'Badminton': 'Badminton Club offers a weekly opportunity for students to play badminton and network with others.\xa0 Whether you are new to the sport or highly skilled, everyone is welcome. All equipment is provided.',
 'Bean Together Coffee': 'Bean Together Coffee Club fosters wellness and community-building by hosting coffee events. By exploring coffee’s many flavors and permutations as a group, and by structuring our events around poetry recitation, discussion topics, and other community-building techniques, we will encourage attendees to develop new friendships and improve their mental well-being.',
 'Dance': 'Dance Club unites the members of the engineering community through different types of dance from a wide range of cultures and backgrounds.',
 'Diversity and Culture': 'Diversity and Culture Club seeks to connect students, faculty, and staff from all different backgrounds and engineering concentrations with exciting social events celebrating various cultures.\xa0 The club als

In [4]:
activities_df = pd.DataFrame(list(activities.items()), columns=['Name', 'Description'])

In [5]:
activities_df["Type"] = "Club"
activities_df["Reference"] = "https://sites.duke.edu/prattgsps/student-clubs/"

In [6]:
activities_df

Unnamed: 0,Name,Description,Type,Reference
0,Badminton,Badminton Club offers a weekly opportunity for...,Club,https://sites.duke.edu/prattgsps/student-clubs/
1,Bean Together Coffee,Bean Together Coffee Club fosters wellness and...,Club,https://sites.duke.edu/prattgsps/student-clubs/
2,Dance,Dance Club unites the members of the engineeri...,Club,https://sites.duke.edu/prattgsps/student-clubs/
3,Diversity and Culture,Diversity and Culture Club seeks to connect st...,Club,https://sites.duke.edu/prattgsps/student-clubs/
4,Media,Media Club engages our students interested in ...,Club,https://sites.duke.edu/prattgsps/student-clubs/
5,Sports,Sports Club provides a safe and inclusive comm...,Club,https://sites.duke.edu/prattgsps/student-clubs/
6,Sustainability,Sustainability Club aims to inspire and engage...,Club,https://sites.duke.edu/prattgsps/student-clubs/


## Scraping Student Advisory Board

In [7]:
data_url = "https://sites.duke.edu/prattgsps/engineering-masters-programs-student-advisory-board/"
page = requests.get(data_url)
soup = BeautifulSoup(page.content, 'html.parser')

In [8]:
# Find all elements with the class "elementor-widget-container"
elements = soup.find_all(class_="elementor-widget-container")

# # Loop through and print the elements
# i = 0
# for element in elements:
#     print(i)
#     print(element)
#     print("__________________________________")
#     i += 1
desc = elements[1].find('p')
description = desc.get_text()


In [9]:
# Create a dictionary for the advisory board
advisory_board = {
    "Name": ["Engineering Master’s Programs Student Advisory Board"],
    "Description": [description],
    "Type": ["Board"],
    "Reference": ["https://sites.duke.edu/prattgsps/engineering-masters-programs-student-advisory-board/"]
}

# Convert the dictionary to a DataFrame
advisory_board_df = pd.DataFrame(advisory_board)

# Print the DataFrame
advisory_board_df

Unnamed: 0,Name,Description,Type,Reference
0,Engineering Master’s Programs Student Advisory...,The Engineering Master’s Programs Student Advi...,Board,https://sites.duke.edu/prattgsps/engineering-m...


In [10]:
activities_df = pd.concat([activities_df, advisory_board_df], ignore_index = True)

## Commities

In [11]:
data_url = "https://gpsg.duke.edu/committees/"
page = requests.get(data_url)
soup = BeautifulSoup(page.content, 'html.parser')

In [12]:
elements = soup.find_all(class_="entry-content clear")
elements[0].find_all('h3')

[<h3>Standing Committees</h3>,
 <h3>Climate Crisis Committee (CCC)</h3>,
 <h3>Diversity, Equity, and Inclusion Committee (DEI)</h3>,
 <h3>GPSG CAN</h3>,
 <h3>Doctoral Student Affairs Committee (DSA)</h3>,
 <h3>External Advocacy Committee (EAC)</h3>,
 <h3>Parking and Transportation Committee (P&amp;T)</h3>,
 <h3>Rules Committee</h3>,
 <h3>International Student Affairs Committee (ISA)</h3>,
 <h3>Events Programming Committee</h3>,
 <h3>GPSG Community Pantry</h3>,
 <h3>GPSG Assembly</h3>]

In [13]:
# Initialize a dictionary to store the committee information
committee_info = {}

# Find all the <h3> tags that contain committee names
committee_names = elements[0].find_all('h3')
# Loop through the <h3> tags to extract committee information
for h3 in committee_names[1:]:
    committee_name = h3.text.strip()
    # Find the <p> tag immediately following the <h3> tag
    p_tag = h3.find_next('p')
    if p_tag:
        committee_description = p_tag.text.strip()
        # Add the committee name and description to the dictionary
        committee_info[committee_name] = committee_description

# Print the dictionary
print(committee_info)

{'Climate Crisis Committee (CCC)': 'If you want to see a truly green Duke University, join GPSG’s Climate Crisis Committee! We work on issues all across campus like divesting our endowment from fossil fuels, improving mental health resources to cope with climate change, advancing environmental justice commitments, and reducing waste from graduate and professional programs. Join us to help us build a more sustainable and equitable future!', 'Diversity, Equity, and Inclusion Committee (DEI)': 'The GPSG DEI Committee is tasked with identifying ways to continuously promote diversity, equity, and inclusion on campus. One major way this is accomplished is through connecting with DEI leaders across campus, be they students, faculty, or staff. Members of the DEI Committee work to impart meaningful change on campus through conversations with administration, passing key resolutions through the GPSG assembly, and collaboration with other student groups both within and without the GPSG body. As is

In [14]:
committees_df = pd.DataFrame(list(committee_info.items()), columns=['Name', 'Description'])
committees_df["Type"] = "Committee"
committees_df["Reference"] = "https://gpsg.duke.edu/committees/"
committees_df

Unnamed: 0,Name,Description,Type,Reference
0,Climate Crisis Committee (CCC),If you want to see a truly green Duke Universi...,Committee,https://gpsg.duke.edu/committees/
1,"Diversity, Equity, and Inclusion Committee (DEI)",The GPSG DEI Committee is tasked with identify...,Committee,https://gpsg.duke.edu/committees/
2,GPSG CAN,GPSG CAN is the Duke University graduate and p...,Committee,https://gpsg.duke.edu/committees/
3,Doctoral Student Affairs Committee (DSA),The Doctoral Student Affairs Committee is task...,Committee,https://gpsg.duke.edu/committees/
4,External Advocacy Committee (EAC),The External Advocacy Committee is tasked with...,Committee,https://gpsg.duke.edu/committees/
5,Parking and Transportation Committee (P&T),The Parking and Transportation Committee is ta...,Committee,https://gpsg.duke.edu/committees/
6,Rules Committee,The Rules Committee is tasked with maintaining...,Committee,https://gpsg.duke.edu/committees/
7,International Student Affairs Committee (ISA),The International Student Affairs Committee is...,Committee,https://gpsg.duke.edu/committees/
8,Events Programming Committee,The GPSG Director of Events Programming is cha...,Committee,https://gpsg.duke.edu/committees/
9,GPSG Community Pantry,"This is not a standing committee, but the Comm...",Committee,https://gpsg.duke.edu/committees/


In [15]:
activities_df = pd.concat([activities_df, committees_df], ignore_index = True)
activities_df

Unnamed: 0,Name,Description,Type,Reference
0,Badminton,Badminton Club offers a weekly opportunity for...,Club,https://sites.duke.edu/prattgsps/student-clubs/
1,Bean Together Coffee,Bean Together Coffee Club fosters wellness and...,Club,https://sites.duke.edu/prattgsps/student-clubs/
2,Dance,Dance Club unites the members of the engineeri...,Club,https://sites.duke.edu/prattgsps/student-clubs/
3,Diversity and Culture,Diversity and Culture Club seeks to connect st...,Club,https://sites.duke.edu/prattgsps/student-clubs/
4,Media,Media Club engages our students interested in ...,Club,https://sites.duke.edu/prattgsps/student-clubs/
5,Sports,Sports Club provides a safe and inclusive comm...,Club,https://sites.duke.edu/prattgsps/student-clubs/
6,Sustainability,Sustainability Club aims to inspire and engage...,Club,https://sites.duke.edu/prattgsps/student-clubs/
7,Engineering Master’s Programs Student Advisory...,The Engineering Master’s Programs Student Advi...,Board,https://sites.duke.edu/prattgsps/engineering-m...
8,Climate Crisis Committee (CCC),If you want to see a truly green Duke Universi...,Committee,https://gpsg.duke.edu/committees/
9,"Diversity, Equity, and Inclusion Committee (DEI)",The GPSG DEI Committee is tasked with identify...,Committee,https://gpsg.duke.edu/committees/


## More Resources

In [16]:
add_resources = [{"Name": "Lawyer Assistance Program", "Description": "The GPSG Lawyer Assistance Program provides students with legal counseling, risk reduction, and mitigation of legal issues.", "Type": "Resource", "Reference": "https://gpsg.duke.edu/resources-for-students/lawyer-assistance-program/"},
                    {"Name": "Funding for Student Groups", "Description": "The Duke Graduate and Professional Student Government encourages students to form groups around social, academic, and cultural similarities. To support these groups, the GPSG allocates a large portion of its budget to funding events and materials for these groups.", "Type": "Resource", "Reference": "https://gpsg.duke.edu/resources-for-students/resources-for-student-groups/"}]

In [17]:
add_resources_df = pd.DataFrame(add_resources)
add_resources_df

Unnamed: 0,Name,Description,Type,Reference
0,Lawyer Assistance Program,The GPSG Lawyer Assistance Program provides st...,Resource,https://gpsg.duke.edu/resources-for-students/l...
1,Funding for Student Groups,The Duke Graduate and Professional Student Gov...,Resource,https://gpsg.duke.edu/resources-for-students/r...


In [18]:
activities_df = pd.concat([activities_df, add_resources_df], ignore_index = True)
activities_df

Unnamed: 0,Name,Description,Type,Reference
0,Badminton,Badminton Club offers a weekly opportunity for...,Club,https://sites.duke.edu/prattgsps/student-clubs/
1,Bean Together Coffee,Bean Together Coffee Club fosters wellness and...,Club,https://sites.duke.edu/prattgsps/student-clubs/
2,Dance,Dance Club unites the members of the engineeri...,Club,https://sites.duke.edu/prattgsps/student-clubs/
3,Diversity and Culture,Diversity and Culture Club seeks to connect st...,Club,https://sites.duke.edu/prattgsps/student-clubs/
4,Media,Media Club engages our students interested in ...,Club,https://sites.duke.edu/prattgsps/student-clubs/
5,Sports,Sports Club provides a safe and inclusive comm...,Club,https://sites.duke.edu/prattgsps/student-clubs/
6,Sustainability,Sustainability Club aims to inspire and engage...,Club,https://sites.duke.edu/prattgsps/student-clubs/
7,Engineering Master’s Programs Student Advisory...,The Engineering Master’s Programs Student Advi...,Board,https://sites.duke.edu/prattgsps/engineering-m...
8,Climate Crisis Committee (CCC),If you want to see a truly green Duke Universi...,Committee,https://gpsg.duke.edu/committees/
9,"Diversity, Equity, and Inclusion Committee (DEI)",The GPSG DEI Committee is tasked with identify...,Committee,https://gpsg.duke.edu/committees/


In [24]:
print(activities_df[activities_df['Name'] == 'GPSG CAN']['Description'])

10    GPSG CAN is the Duke University graduate and p...
Name: Description, dtype: object


In [28]:
# for i in activities_df['Description']:
#     print(i)
#     print("____________________")

In [30]:
CAPS_resources = [{"Name": "Individual Counseling & Psychiatric Services", 
                  "Description": "Individual counseling provides students the opportunity to meet with a provider and collaboratively work to determine and agree upon goals of treatment in a one-on-one setting. It's part of Counseling and Psychological Services (CAPS)", 
                  "Type": "Service", 
                  "Reference": "https://students.duke.edu/wellness/caps/individual/"
                 },
                 {"Name": "Group Counseling", 
                  "Description": "Our Group Services include group therapy and discussion groups with a variety of goals that aim to promote wellbeing. Research demonstrates that group therapy is equally effective as individual therapy and is often the treatment of choice for student issues. It's part of Counseling and Psychological Services (CAPS)", 
                  "Type": "Service", 
                  "Reference": "https://students.duke.edu/wellness/caps/group/"
                 },
                 {"Name": "Workshops & Discussions", 
                  "Description": "We provide a wide range of interactive, skill-building workshops on various mental health topics for the entire Duke community. It's part of Counseling and Psychological Services (CAPS)", 
                  "Type": "Service", 
                  "Reference": "https://students.duke.edu/wellness/caps/"
                 },
                 {"Name": "Referrals to Community Providers", 
                  "Description": "Our primary goal is to ensure students have access to the best possible care based on their unique needs and sometimes this means working with a professional outside of Counseling and Psychological Services (CAPS). To start, you can meet with our referral coordinator or browse our list of community providers.", 
                  "Type": "Service", 
                  "Reference": "https://duke.miresource.com/"
                 }
                ]

In [31]:
CAPS_resources_df = pd.DataFrame(CAPS_resources)
CAPS_resources_df

Unnamed: 0,Name,Description,Type,Reference
0,Individual Counseling & Psychiatric Services,Individual counseling provides students the op...,Service,https://students.duke.edu/wellness/caps/indivi...
1,Group Counseling,Our Group Services include group therapy and d...,Service,https://students.duke.edu/wellness/caps/group/
2,Workshops & Discussions,"We provide a wide range of interactive, skill-...",Service,https://students.duke.edu/wellness/caps/
3,Referrals to Community Providers,Our primary goal is to ensure students have ac...,Service,https://duke.miresource.com/


In [32]:
activities_df = pd.concat([activities_df, CAPS_resources_df], ignore_index = True)
activities_df

Unnamed: 0,Name,Description,Type,Reference
0,Badminton,Badminton Club offers a weekly opportunity for...,Club,https://sites.duke.edu/prattgsps/student-clubs/
1,Bean Together Coffee,Bean Together Coffee Club fosters wellness and...,Club,https://sites.duke.edu/prattgsps/student-clubs/
2,Dance,Dance Club unites the members of the engineeri...,Club,https://sites.duke.edu/prattgsps/student-clubs/
3,Diversity and Culture,Diversity and Culture Club seeks to connect st...,Club,https://sites.duke.edu/prattgsps/student-clubs/
4,Media,Media Club engages our students interested in ...,Club,https://sites.duke.edu/prattgsps/student-clubs/
5,Sports,Sports Club provides a safe and inclusive comm...,Club,https://sites.duke.edu/prattgsps/student-clubs/
6,Sustainability,Sustainability Club aims to inspire and engage...,Club,https://sites.duke.edu/prattgsps/student-clubs/
7,Engineering Master’s Programs Student Advisory...,The Engineering Master’s Programs Student Advi...,Board,https://sites.duke.edu/prattgsps/engineering-m...
8,Climate Crisis Committee (CCC),If you want to see a truly green Duke Universi...,Committee,https://gpsg.duke.edu/committees/
9,"Diversity, Equity, and Inclusion Committee (DEI)",The GPSG DEI Committee is tasked with identify...,Committee,https://gpsg.duke.edu/committees/


In [46]:
duke_groups_df = pd.read_csv("data_groups.csv", encoding='latin-1')
duke_groups_df

Unnamed: 0,title,link,mission
0,Academic Guides,https://dukegroups.com/AcademicGuides/,We are members of the undergraduate community'...
1,Academic Resource Center,https://arc.duke.edu/,"We know you are already smart, but why stop th..."
2,Acts2 at Duke,http://www.acts2oncampus.org/,Were a campus fellowship committed to followi...
3,Adopt A Grandparent,https://sites.duke.edu/dukeaag,Adopt a Grandparent is a student community ser...
4,AgapÃ© Campus Christian Fellowship,http://www.agapeharvestchurch.org/,To provide undergrad and graduate students at ...
...,...,...,...
578,The Pitchforks,http://www.dukepitchforks.com/,"The Pitchforks are first and foremost a TTBB, ..."
579,The Poetea Club,https://dukegroups.com/Poetea/,
580,The UNICEF Club at Duke University,https://dukegroups.com/UNICEF/,The mission of this organization will be to ab...
581,The Wild Ones,https://www.instagram.com/duke_wild_ones/,The mission of The Wild Ones is to provide und...


In [80]:
duke_groups_df[duke_groups_df.mission.isna()]

Unnamed: 0,title,link,mission
24,Bassett House Council,https://dukegroups.com/Bassett/,
26,Bell Tower House Council,https://dukegroups.com/Bell/,
32,Black Pre-Law Society at Duke,https://dukegroups.com/DBPLS/,
35,Blackwell House Council,https://dukegroups.com/Blackwell/,
36,Blue Angels Greek Dance,https://dukegroups.com/Angeloi/,
...,...,...,...
570,The Flow,https://theflowduke.com/,
572,The Fouling Community LLC,https://dukegroups.com/foulingllc/,
575,The Lyceum,https://dukegroups.com/Lyceum/,
579,The Poetea Club,https://dukegroups.com/Poetea/,


In [84]:
# Assuming duke_groups_df is your DataFrame
duke_groups_starting_df = duke_groups_df[duke_groups_df['link'].str.startswith('https://dukegroups.com/')]
duke_groups_starting_df = duke_groups_starting_df[duke_groups_starting_df.mission.isna()]
duke_groups_starting_df

Unnamed: 0,title,link,mission
24,Bassett House Council,https://dukegroups.com/Bassett/,
26,Bell Tower House Council,https://dukegroups.com/Bell/,
32,Black Pre-Law Society at Duke,https://dukegroups.com/DBPLS/,
35,Blackwell House Council,https://dukegroups.com/Blackwell/,
36,Blue Angels Greek Dance,https://dukegroups.com/Angeloi/,
...,...,...,...
564,The Chronicle,https://dukegroups.com/TheChronicle/,
572,The Fouling Community LLC,https://dukegroups.com/foulingllc/,
575,The Lyceum,https://dukegroups.com/Lyceum/,
579,The Poetea Club,https://dukegroups.com/Poetea/,


In [98]:
def mission_from_link(x):
    print(x)
    test_url = x
    test_page = requests.get(test_url)
    test_soup = BeautifulSoup(test_page.content, 'html.parser')
    elements = test_soup.find_all(class_="col-sm-10 col-md-8")
    if elements:
        mission_statement = elements[0].find('p')
        text = mission_statement.get_text().strip().replaceAll("[\\t\\n\\r]+"," ")
        if text == 'Our mission is (fill in your mission here...)':
            return "To know about it, reach out to people from the reference link."
        return text
    else:
        return "To know about it, reach out to people from the reference link."


duke_groups_starting_df['mission'] = duke_groups_starting_df['link'].apply(mission_from_link)
duke_groups_starting_df

https://dukegroups.com/Bassett/
https://dukegroups.com/Bell/
https://dukegroups.com/DBPLS/
https://dukegroups.com/Blackwell/
https://dukegroups.com/Angeloi/
https://dukegroups.com/boostbeyond/


AttributeError: 'str' object has no attribute 'replaceAll'

In [None]:
duke_groups_starting_df

In [None]:
set(duke_groups_starting_df.mission)