In [2]:
# Import dependancies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import pymongo

In [3]:
# Setup Mongo
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
db = client.colony_square_db
collection = db.event_list

In [4]:
# Set up Chrome
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Get LATEST driver version for 93.0.4577
Trying to download new driver from https://chromedriver.storage.googleapis.com/93.0.4577.63/chromedriver_win32.zip
Driver has been saved in cache [C:\Users\jerom\.wdm\drivers\chromedriver\win32\93.0.4577.63]


In [5]:
# Get Colony Square Events URL
url = "https://colonysquare.com/kiosk/events/"
response = requests.get(url)

In [6]:
# Parse response
soup = BeautifulSoup(response.text, 'html.parser')

In [7]:
# Create arrays for use in dataframe
event_name = []
event_desc = []
event_date = []
event_frequency = []
event_link = []

In [8]:
# Find first 10 event divs
# Scope of project is to use only first 10 events for each venue
results = soup.find_all('div', class_='event')[:10]

In [13]:
# Loop through results
for result in results:
    
    # Get event title
    event_title = result.find('div', class_='event-title').text
    
    # Retrieve event link
    # Then, go to event page and pull first paragraph to use as description
    event_page = result.find('a', class_='nav-link')['href']
    event_response = requests.get(f'https://colonysquare.com/{event_page}')
    soup2 = BeautifulSoup(event_response.text, 'html')
    ep_results = soup2.find('p').text
    
    # Get event date
    event_date_f = result.find('div', class_='event-date').text
    
    # Finds where 
    try:
        index = event_date_f.find("September")
        event_date_clean = event_date_f[index:] 
    
    except:
        event_date_clean = event_date_f
    
    # Get event occurance
    # Since we are only using the first 10 events, they are
    # Guaranteed to be within the month of September. 
    try:
        index2 = event_date_f.find("September")
        event_freq = event_date_f[:index]
        event_freq = event_freq.replace("from", "")
        
    except:
        event_freq = "Daily/One Day Event"
    
    # Append variables to arrays
    event_name.append(event_title)
    event_link.append(f'https://colonysquare.com/{event_page}')
    event_desc.append(ep_results)
    event_date.append(event_date_clean)
    event_frequency.append(event_freq)
    
    # Create post to Mongo
    post = {
        "event_name": event_title,
        "event_description": ep_results,
        "event_date": event_date_clean,
        "event_frequency": event_freq,
        "event_link": f'https://colonysquare.com/{event_page}'
    }
    
    # Post to Mongo
    collection.insert_one(post)

In [14]:
# Create df using created arrays
colony_square_df = pd.DataFrame({
    "event_name": event_name,
    "event_description": event_desc,
    "event_date": event_date,
    "event_frequency": event_frequency,
    "event_link": event_link
})


In [15]:
colony_square_df

Unnamed: 0,event_name,event_description,event_date,event_frequency,event_link
0,National Yoga Month,"Throughout the month of September, we're encou...",September 11 to September 30,Daily,https://colonysquare.com//kiosk/events/details...
1,Game Day On The Square,It’s time to put on your game faces! Kick back...,September 11 to October 31,Daily,https://colonysquare.com//kiosk/events/details...
2,What's Happening in Politan Row,Enjoy live music every week while you explore ...,September 11 to October 30,Mondays and Saturdays,https://colonysquare.com//kiosk/events/details...
3,Drag Bingo at Politan Row,Celebrate Sunday Funday every weekend at Polit...,September 12 to October 31,Sundays,https://colonysquare.com//kiosk/events/details...
4,National Dance Week,Celebrate National Dance Week at Colony Square...,September 13,"Monday,",https://colonysquare.com//kiosk/events/details...
5,Laughing Hour On The Square,"Laughing is good for the soul, so take a load ...",September 13 to October 4,Mondays,https://colonysquare.com//kiosk/events/details...
6,Colony Kids,Enjoy Colony Kids the last Tuesday of the mont...,September 14 to October 26,Tuesdays,https://colonysquare.com//kiosk/events/details...
7,Yoga On The Square,Discover your inner zen + take your practice t...,September 15 to October 27,Wednesdays,https://colonysquare.com//kiosk/events/details...
8,CREW Atlanta Empowerment Speaker Series,Colony Square is a viewing location for the Se...,September 16,"Thursday,",https://colonysquare.com//kiosk/events/details...
9,Movies On The Square,Enjoy Movies On The Square the first Thursday ...,September 16 to October 7,Thursdays,https://colonysquare.com//kiosk/events/details...


In [16]:
colony_square_df.to_csv("colony_square.csv")

In [None]:
browser.quit()