# Today in ATX

Scrape Austin 512 site for events, addresses, dates, etc.. 
Put all data into a SQL database for reference.

In [1]:
# Import Libraries
import os
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
from datetime import datetime as dt
from datetime import timedelta as td
from sqlalchemy import create_engine
import pymysql
pymysql.install_as_MySQLdb()
import numpy as np
import requests
import pymongo

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [2]:
# MAC users, call browser
executable_path = {'executable_path': '/Users/prettyvo/Downloads/chromedriver2'}
browser = Browser('chrome', **executable_path, headless=False)

## Austin 512

Scrape the Austin 512 site for events, locations, and time. Save all information into a dataframe



In [3]:
# create function to scrape desired data
def austin_512(url, date_string):
    titles = []
    venues = []
    times_start = []
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    for x in range(0,5):
        try:
            titles = titles + soup.find_all('span', class_='ds-listing-event-title-text')
            venues = venues + soup.find_all('div', class_='ds-venue-name')
            times_start =  times_start + soup.find_all('div', class_='ds-event-time dtstart')
            browser.click_link_by_partial_text('Next Page')
        except:
            pass
        
    titles_list = [title.text.replace('\n', "") for title in titles]
    venues_list = [venue.text.replace('\n', "") for venue in venues]
    times_list = [time.text.replace('\n', "").strip().split(' ')[0] for time in times_start]
    date_list = [date_string for title in titles]
    
    austin = pd.DataFrame({'Date': date_list,
                           'Event': titles_list,
                         'Venue': venues_list,
                         'Start_time': times_list})
    
    return(austin)

In [4]:
# create data frame to hold all data
austin_df = pd.DataFrame()

# call function to pull event information for 1 week
for x in range(0,7):
    date = (dt.today() + td(days=x)).strftime('%Y/%m/%d')
    url = 'https://do512.com/events/' + date
    austin_df = austin_df.append(austin_512(url, date))

In [5]:
austin_df.head()

Unnamed: 0,Date,Event,Venue,Start_time
0,2019/04/17,"Quinn XCII: From Tour With Love w/ Ashe, Chris...",Stubb's,6:00PM
1,2019/04/17,Over the Rainbow Comedy,Barrel O'Fun,8:00PM
2,2019/04/17,"Rattlesnake Milk, Pink Mexico, & Young Mammals",Hotel Vegas,9:00PM
3,2019/04/17,Happy Hour 4-6pm: $2 Tall B...,Cheer Up Charlies,4:00PM
4,2019/04/17,The Wagoneers,The Saxon Pub,6:00PM


In [6]:
# groupby venues to recieve a list of unique venues
distinct_venues = austin_df['Venue'].unique()
distinct_venues = pd.DataFrame(distinct_venues, columns=['Unique_Venues '])
distinct_venues.head()

Unnamed: 0,Unique_Venues
0,Stubb's
1,Barrel O'Fun
2,Hotel Vegas
3,Cheer Up Charlies
4,The Saxon Pub


In [22]:
# create engine to connect to local SQL database
rds_connection_string = "root:YgtPoM1@3$@127.0.0.1/austin_512_db"
engine = create_engine(f'mysql://{rds_connection_string}')

In [23]:
# connect and transfer data to sql table
engine.table_names()
austin_df.to_sql(name='austin_events', con=engine, if_exists='append', index=False)

In [24]:
# double check the data passed to the table
pd.read_sql_query('select * from austin_events', con=engine).head()

Unnamed: 0,id,Date,event,venue,start_time
0,1,2019/04/17,"Quinn XCII: From Tour With Love w/ Ashe, Chris...",Stubb's,6:00PM
1,2,2019/04/17,Over the Rainbow Comedy,Barrel O'Fun,8:00PM
2,3,2019/04/17,"Rattlesnake Milk, Pink Mexico, & Young Mammals",Hotel Vegas,9:00PM
3,4,2019/04/17,Happy Hour 4-6pm: $2 Tall B...,Cheer Up Charlies,4:00PM
4,5,2019/04/17,The Wagoneers,The Saxon Pub,6:00PM


## Event Parking

Scrape site for parking nearest to the event. Collect all the data and save into a dataframe

In [7]:
# connect to the browser 
browser.visit("https://www.parkme.com/map#Baker Street Pub & Grill")
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [8]:
# create function to scrape all desired data
def find_parking(venue):
    parking_dict = []
    df_parking = pd.DataFrame(columns=['Venue', 'Location', 'Price'])
    browser.visit("https://www.parkme.com/map#"+venue)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    parking_locs = soup.find_all('div',class_='fle_lot_name', limit=5)
    prices = soup.find_all('a',class_='left btn btn-primary btn-small fle_reserve compare-res-btn', limit=5)
    locations = [p.text for p in parking_locs]
    price = [p.text for p in prices]
    venues = [ venue for p in range(1,5)]
#     print(locations[5])
    try:
        for x in range(0,1):
            values = {'Venue': venues[x], 'Location':locations[x], "Price":price[x]}
            parking_dict.append(values)
    except:
        pass
    return(parking_dict)

In [14]:
# find the nearest parking spots near the venue
unique_venues = austin_df['Venue'].unique().tolist()
parking = [find_parking(unique) for unique in unique_venues]


In [16]:
# pass all the data into a dataframe
for c, value in enumerate(parking, 1):
    if c ==1:
        parking_temp = pd.DataFrame(value)
    elif c > 1 :
        parking_temp = parking_temp.append(pd.DataFrame(value))

In [34]:
parking_df = parking_temp.reset_index()
del(parking_df['index'])
parking_df.head()

Unnamed: 0,Location,Price,Venue
0,G 31 Parking,$0.00,Stubb's
1,G 31 Parking,$0.00,Barrel O'Fun
2,G 31 Parking,$0.00,Hotel Vegas
3,G 31 Parking,$0.00,Cheer Up Charlies
4,G 31 Parking,$0.00,The Saxon Pub


In [None]:
# create engine to connect to local SQL database
rds_connection_string = "root:YgtPoM1@3$@127.0.0.1/austin_512_db"
engine = create_engine(f'mysql://{rds_connection_string}')

In [36]:
# connect and transfer data to sql table
engine.table_names()
parking_df.to_sql(name='austin_events_parking', con=engine, if_exists='append', index=False)

In [37]:
# double check the data passed to the table
pd.read_sql_query('select * from austin_events_parking', con=engine).head()

Unnamed: 0,id,Location,Price,Venue
0,1,G 31 Parking,$0.00,Stubb's
1,2,G 31 Parking,$0.00,Barrel O'Fun
2,3,G 31 Parking,$0.00,Hotel Vegas
3,4,G 31 Parking,$0.00,Cheer Up Charlies
4,5,G 31 Parking,$0.00,The Saxon Pub


## Austin Weather

Read in an HTML table to retrieve data by the hour

In [19]:
austin_weather = pd.read_html("https://weather.com/weather/tenday/l/USTX0057:1:US", header=0)
weather_df = austin_weather[0]
weather_df.rename(columns={'Day': 'dummy', 'Description': 'Date', 'High / Low':'Description',
                          'Precip':'High/Low','Wind':'Precip', 'Humidity':'Wind', 'Unnamed: 6':'Humidity'}, inplace=True)
del(weather_df['dummy'])

In [20]:
weather_df['New Date'] = ""
weather_df.head()

Unnamed: 0,Date,Description,High/Low,Precip,Wind,Humidity,New Date
0,Today APR 17,Scattered Strong Storms,84°61°,50%,S 12 mph,72%,
1,ThuAPR 18,Sunny,76°55°,10%,NW 19 mph,58%,
2,FriAPR 19,Sunny,78°53°,0%,NNW 10 mph,38%,
3,Sat APR 20,Mostly Sunny,84°62°,0%,S 12 mph,47%,
4,Sun APR 21,Partly Cloudy,85°66°,10%,S 16 mph,57%,


In [40]:
for y in range(0,7):
    date = (dt.today() + td(days=y)).strftime('%Y/%m/%d')
    weather_df.set_value(y,'Dates',date)
# del(weather_df['Date'])
del(weather_df['New Date'])
weather_df = weather_df.head(7)
weather_df.rename(columns={'High/Low': 'High_Low', }, inplace=True)
weather_df

Unnamed: 0,Description,High_Low,Precip,Wind,Humidity,Dates
0,Scattered Strong Storms,84°61°,50%,S 12 mph,72%,2019/04/17
1,Sunny,76°55°,10%,NW 19 mph,58%,2019/04/18
2,Sunny,78°53°,0%,NNW 10 mph,38%,2019/04/19
3,Mostly Sunny,84°62°,0%,S 12 mph,47%,2019/04/20
4,Partly Cloudy,85°66°,10%,S 16 mph,57%,2019/04/21
5,AM Thunderstorms,82°67°,40%,SSE 7 mph,67%,2019/04/22
6,Scattered Thunderstorms,78°66°,50%,ESE 7 mph,76%,2019/04/23


In [41]:
# connect and transfer data to sql table
engine.table_names()
weather_df.to_sql(name='austin_events_weather', con=engine, if_exists='append', index=False)

In [42]:
pd.read_sql_query('select * from austin_events_weather', con=engine).head()

Unnamed: 0,id,Description,High_Low,Precip,Wind,Humidity,Dates
0,1,Scattered Strong Storms,84°61°,50%,S 12 mph,72%,2019/04/17
1,2,Sunny,76°55°,10%,NW 19 mph,58%,2019/04/18
2,3,Sunny,78°53°,0%,NNW 10 mph,38%,2019/04/19
3,4,Mostly Sunny,84°62°,0%,S 12 mph,47%,2019/04/20
4,5,Partly Cloudy,85°66°,10%,S 16 mph,57%,2019/04/21
