## Shopping Plan

### Import data for current suppliers

In [316]:
import pandas as pd
import requests 
import numpy as np
from bs4 import BeautifulSoup


In [334]:
current = pd.read_csv('suppliers.csv')
current

Unnamed: 0,supplier
0,Ivystone
1,Meissenburg Designs
2,Snark City
3,Chronicle Books
4,Workman Publishing
5,Mary Square
6,Ellembee
7,Walton Wood Farm
8,Sapling Press
9,Knock Knock


### Scape market website to find where these suppliers will be at market

In [315]:
#My inital try was with beautiful soup, but this did not return all the data on the webpage
bnum = 1
floornum = 20
url = 'https://www.americasmart.com/browse/#/exhibitor?market=23&building=' + str(bnum) + '&floor=' + str(floornum) 
response = requests.get(url)
print(url)

if response.ok == True:
    data = response.text
    #print (data)
    soup = BeautifulSoup(data, 'lxml')
    #print (soup.prettify())
    booths = soup.select('body')
    #print (booths)
    text = soup.find_all(text=True)
    #print(text)

https://www.americasmart.com/browse/#/exhibitor?market=23&building=1&floor=20


In [171]:
#found the correct url of the AJAX request using inspect tool in google chrome
#instructions under"javascript heavy websites" on 
#https://blog.hartleybrody.com/web-scraping-cheat-sheet/#useful-libraries
url = 'https://wem.americasmart.com/api/v1.2/Search/LinesAndPhotosByMarket?status=ACTIVE_AND_UPCOMING&marketID=23'
r = requests.get(url)
info = r.json()
print(info)

[{'showroomName': 'Moby Dick Specialties', 'booths': [{'floorNum': 10, 'isPerm': True, 'building': 2.0, 'lateOptIn': None, 'meridianUID': '1029', 'showMarketLateOptInDates': [{'showLateOptInDatesID': 14, 'showLateOptInDate': '2020-01-16T05:00:00Z'}], 'title': '1029', 'boothID': 269037}], 'logo': None, 'productLines': [{'exhibLineID': 269271, 'description': 'Moby Dick Specialties'}], 'exhibitorID': 2187}, {'showroomName': 'Abbey & CA Gift', 'booths': [{'floorNum': 13, 'isPerm': True, 'building': 2.0, 'lateOptIn': None, 'meridianUID': '1335A', 'showMarketLateOptInDates': [{'showLateOptInDatesID': 14, 'showLateOptInDate': '2020-01-16T05:00:00Z'}], 'title': '1335A', 'boothID': 249415}], 'logo': '//wem.americasmart.com/convdata/amc/images/ExhibitorLogos/44FEE1FF-5056-86CF-980D4A62EACE696B.jpg', 'productLines': [{'exhibLineID': 294565, 'description': 'Cathedral Art Metal Co.'}, {'exhibLineID': 300361, 'description': 'Amazing Woman Collection'}, {'exhibLineID': 300362, 'description': 'Say it 

In [294]:
#narrowing down all suppliers in one show room
for j in range(0,len(info[1]['productLines'])):
    print(info[1]['productLines'][j]['description'])

#played around with indexing to find the data I needed
info[1]['booths'][0]['title']


Cathedral Art Metal Co.
Amazing Woman Collection
Say it with Sass
Advent Collection
Grace Outpoured Coaster Mugs
Professions
Car Charms & Visor Clips
Abbey & CA Gift


'1335A'

In [295]:
allbooths = []

for i in range(0, len(info)):
    loc = info[i] #select the item in the list provided by the AJAX query
    for j in range(0,len(info[i]['productLines'])):  #some booths have multiple lines, which is why we need 2 for loops
        
        booth = loc['showroomName'] 
        boothid = loc['booths'][0]['title']
        line = info[i]['productLines'][j]['description']
        bldg = int(loc['booths'][0]['building']) 
        floor = loc['booths'][0]['floorNum']

        sublist1 = [line,bldg,floor,booth,boothid] #want there to be a separate record if this line is in a booth with another title
        sublist2 = [booth,bldg,floor,booth,boothid] #also want to capture lines that have their own booth
        allbooths.append(sublist1)
        allbooths.append(sublist2)

df = pd.DataFrame(allbooths,columns=['supplier','building','floor','booth','id'])


In [296]:
df.head()

Unnamed: 0,supplier,building,floor,booth,id
0,Moby Dick Specialties,2,10,Moby Dick Specialties,1029
1,Moby Dick Specialties,2,10,Moby Dick Specialties,1029
2,Cathedral Art Metal Co.,2,13,Abbey & CA Gift,1335A
3,Abbey & CA Gift,2,13,Abbey & CA Gift,1335A
4,Amazing Woman Collection,2,13,Abbey & CA Gift,1335A


In [297]:
df2 = df.drop_duplicates()

In [298]:
#sanity check on data by searching for a value I know is in the csv file
df2[df2['supplier']=="Snark City"]

Unnamed: 0,supplier,building,floor,booth,id
1766,Snark City,2,17,Just Got 2 Have It!,1709


In [299]:
#merge the two datasets together to combine the desired suppliers with their locations
merge = current.merge(df2)
merge

Unnamed: 0,supplier,building,floor,booth,id
0,Ivystone,2,18,Ivystone,1801
1,Meissenburg Designs,2,7,Meissenburg Designs,787B
2,Snark City,2,17,Just Got 2 Have It!,1709
3,Chronicle Books,2,16,"Simblist Group, The",1621
4,Workman Publishing,2,17,Anne McGilvray & Company,1718
5,Mary Square,2,18,OneCoast,1800
6,Mary Square,2,6,Mary Square,621
7,Walton Wood Farm,2,15,Road Runners,1500
8,Sapling Press,2,16,daniel richards,1634
9,Knock Knock,2,17,Just Got 2 Have It!,1709


In [300]:
#find any values that are in csv but not in the webscraped data--have to look at the rows after row 35
s1 = merge.supplier
s2 = current.supplier
s3 = s1.append(s2)
print(s3.drop_duplicates())

0                   Ivystone
1        Meissenburg Designs
2                 Snark City
3            Chronicle Books
4         Workman Publishing
5                Mary Square
7           Walton Wood Farm
8              Sapling Press
9                Knock Knock
10                 Capabunga
11                 Fish Kiss
12           Barefoot Dreams
13            Creative Co-Op
14               Adams & Co.
15              Spunky Fluff
16              Design Ideas
18        Eric & Christopher
19              Karma Living
20             Sweet Gumball
21               Stash Style
22            Ella B Candles
23    Cedar Mountain Studios
24                    Joules
28     Cheryl Stevens Studio
29               Half United
30                   Baizaar
31                   Gorjana
32           Carina Paper Co
33      Pretty Alright Goods
34           Cotn Collection
35              Reeves & Co.
6                   Ellembee
16      Anne McGilvray & Co.
23                     julio
24            

In [312]:
#found typing errors in csv file
#import new, edited csv file
current2 = pd.read_csv('suppliers_v2.csv')
merge2 = current2.merge(df2)

In [313]:
#check all values are in both
#only one I couldn't correct was WHD
s1 = merge2.supplier
s2 = current2.supplier
s3 = s1.append(s2)
print(s3.drop_duplicates())

0                     Ivystone
1          Meissenburg Designs
2                   Snark City
3              Chronicle Books
4           Workman Publishing
5                  Mary Square
7                Ellembee Home
8             Walton Wood Farm
9                Sapling Press
10                 Knock Knock
11                   Capabunga
12                   Fish Kiss
13             Barefoot Dreams
14              Creative Co-Op
15                 Adams & Co.
16                Spunky Fluff
17    Anne McGilvray & Company
18                Design Ideas
20          Eric & Christopher
21                Karma Living
22               Sweet Gumball
23                 Stash Style
24              Ella B Candles
25               Julio Designs
27            Torched Products
28            Socksmith Design
29      Cedar Mountain Studios
30                      Joules
34       Venture Imports, LLC 
35       Cheryl Stevens Studio
36                 Half United
37                     Baizaar
38      

In [329]:
merge2.sort_values(by=['building','floor'],inplace = True)
merge2

Unnamed: 0,supplier,building,floor,booth,id
20,Eric & Christopher,1,9,Eric & Christopher,C19
21,Karma Living,1,9,Karma Living,C15
23,Stash Style,1,10,Stash Style,E6
22,Sweet Gumball,1,11,Sweet Gumball,E14
12,Fish Kiss,2,1,Fish Kiss,231
6,Mary Square,2,6,Mary Square,621
15,Adams & Co.,2,6,Adams & Co.,637A
1,Meissenburg Designs,2,7,Meissenburg Designs,787B
16,Spunky Fluff,2,7,Prairie Dance / Spunky Fluff,796
19,Design Ideas,2,8,Design Ideas,812B


In [332]:
# reset the index and export
merge2.set_index(np.arange(0,46),inplace=True)
merge2.to_csv('Market_Plan.csv')