# Part 1: Extract DATA from website

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

In [5]:
url = 'https://www.irvinespectrumcenter.com/shopping/stores?filter=all'

response = requests.get(url)

soup = BeautifulSoup(response.text)

In [6]:
rows=[]
companies = soup.find('div',{'class':'directory__listings'}).find_all('div',{'class':'directory__listings__column'})[1:]

for company in companies:
    row={}
    row['Name']      =  company['data-name']
    row['Features']  =  company['data-listing-row'].split(' ')
    row['Phone']     =  company.find('a').find('div',{'class':'directory__listing__phone'}).text
    row['Suite']     =  company['data-suite']
    row['Logo_url']  =  company.find('a').find('img')['src']
    row['Category1'] =  company.find('a')['href'].split('/')[1]
    row['Category2'] =  company.find('a')['href'].split('/')[2]
    row['Status']    =  company['data-status']
    row['Deal']      =  company['data-deals']

    rows.append(row)


In [7]:
data = pd.DataFrame(rows) # build a data frame from the list of dictionaries

pd.set_option('display.max_rows', None)

file_name = 'irvinespectrumcenter ' + datetime.now().strftime("%d-%m-%Y %H%M%S") + ".csv" # folder location with file name

data.to_csv(file_name) # save data frame as csv in file location

data # display data frame

Unnamed: 0,Name,Features,Phone,Suite,Logo_url,Category1,Category2,Status,Deal
0,85C Bakery Cafe,"[specialty-food, curbside-pickup, delivery, di...",(949) 727-0985,915,https://images.ctfassets.net/l16b5usqno44/umb-...,dining,restaurants,,False
1,adidas,"[children-s-apparel, men-s-apparel, shoes-hand...",(949) 753-0305,650,https://images.ctfassets.net/l16b5usqno44/umb-...,shopping,stores,,False
2,aerie,"[women-s-apparel, curbside-pickup, in-store-sh...",(949) 255-9186,721,https://images.ctfassets.net/l16b5usqno44/umb-...,shopping,stores,,False
3,Afters Ice Cream,"[specialty-food, delivery, dine-in, outdoor-se...",,918,https://images.ctfassets.net/l16b5usqno44/umb-...,dining,restaurants,,False
4,Alley,"[specialty-food, delivery, dine-in, outdoor-se...",(949) 932-0634,507,https://images.ctfassets.net/l16b5usqno44/umb-...,dining,restaurants,,False
...,...,...,...,...,...,...,...,...,...
171,World of Hearing,"[electronics-home-entertainment, medical-denta...",,821,https://images.ctfassets.net/l16b5usqno44/umb-...,shopping,stores,Coming Soon,False
172,Yankee Candle,"[books-cards-gifts, in-store-shopping, deals]",(949) 450-0725,735,https://images.ctfassets.net/l16b5usqno44/umb-...,shopping,stores,,True
173,Yard House,"[restaurants-full-service-dining, curbside-pic...",(949) 753-9373,620,https://images.ctfassets.net/l16b5usqno44/umb-...,dining,restaurants,,False
174,Yogurtland,"[specialty-food, curbside-pickup, dine-in, out...",(949) 536-5303,733,https://images.ctfassets.net/l16b5usqno44/umb-...,dining,restaurants,,False


# Part 2: Visuals
### Questions to answer:

* How many Stores
* How many Stores by Category
* Sores amount by Suit location
* Whats the ratio Deals to Category in both categories
* which companies have deal
* which companies are newly open

In [9]:
import plotly.express as px
import plotly.graph_objects as go

In [128]:
data_removed_duplicates = data[~data.Name.duplicated()]

Stores = data_removed_duplicates.Name.count()

print(f'''
    QUESTION 1: \n
    Amount of stores: {Stores}
''')


    QUESTION 1: 

    Amount of stores: 175



In [134]:
data_Stores_by_Category = data[['Category1']].value_counts().reset_index()
data_Stores_by_Category


Unnamed: 0,Category1,0
0,shopping,118
1,dining,58


In [129]:
data.columns

Index(['Name', 'Features', 'Phone', 'Suite', 'Logo_url', 'Category1',
       'Category2', 'Status', 'Deal', 'Suite_Group'],
      dtype='object')

In [76]:
# Difference of restaurants and stores

data_category_Status = data[['Category1','Status']].value_counts().reset_index()

data_category_Status.Status.replace('','Open', inplace=True)

fig =px.bar(
    data_category_Status,
    orientation='h',
    y='Category1',
    x=0,
    color="Status",
    text_auto=True)

fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)'
# 'paper_bgcolor': 'rgba(0, 0, 0, 0)'
})

fig.show()



In [77]:
data_category_Status

Unnamed: 0,Category1,Status,0
0,shopping,Open,93
1,dining,Open,53
2,shopping,Now Open,18
3,shopping,Coming Soon,7
4,dining,Now Open,4
5,dining,Coming Soon,1


In [79]:
data_category_Status = data[['Category1','Deal']].value_counts().reset_index()

fig =px.bar(
    data_category_Status,
    orientation='h',
    y='Category1',
    x=0,
    color="Deal",
    text_auto=True)

fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)'
# 'paper_bgcolor': 'rgba(0, 0, 0, 0)'
})

fig.show()

In [106]:
# Questions to answer:

# How many Stores
# How many Stores by Category
# Sores amount by Suit location
# Whats the ratio Deals to Category in both categories
# which companies have deal
# which companies are newly open

In [107]:
data['Suite_Group'] = data.Suite.apply(lambda x: 'Group '+x[0])

data_suite = data[['Suite_Group','Deal']].value_counts().reset_index()

px.bar(
    data_suite,
    y=0,
    x='Suite_Group',
    color="Deal",
    text_auto=True
).update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)'})


Unnamed: 0,Suite_Group,Deal,0
0,7,False,50
1,8,False,39
2,6,False,38
3,5,False,18
4,7,True,9
5,9,False,8
6,6,True,7
7,8,True,5
8,5,True,2
