In [265]:
import requests
from bs4 import BeautifulSoup
import ast
import pandas as pd

In [266]:
website = 'https://www.schmalz.com/en/services/consultation/select-customer-adviser/international-sales-partner/'
data = requests.get(website).text
soup = BeautifulSoup(data,'lxml')

In [267]:
#Collect HTML associated with Google Maps then remove starting and ending [ ]
gmap_string = soup.find('div',id='map')['data-markers'][1:-1]
#Split the string for each sales partner or subsidiary based on the string structure containing dictionnaries for each entry
list_partners = [x+'}' for x in gmap_string.split('},')]
#Manually correct an anomaly that caused 2 } brackets for index 82
list_partners[82] = list_partners[82][:-1]
#Convert all dictionnary strings into python dictionnaries 
list_partners = [ast.literal_eval(x) for x in list_partners]

In [268]:
def get_index_substring(t_strings,substring):
    #Get index of first string element in t_strings that contains substring
    return next(i for (i,string) in enumerate(t_strings) if substring in string)


def info_to_dict(partner_dict):
    #Make the html key of the partner dictionnary information into a new structured dictionnary containing name, address, contact information
    #website and Schmalz business units
    temp_soup = BeautifulSoup(partner_dict['html'],'lxml')
    list_info = temp_soup.get_text(separator = ',').split(',')
    
    #Create through trial and error multiple local dictionnaries to concatenate with the original dictionnary
    try:
        tel_index = get_index_substring(list_info,'T:')
        res_dicotel = {'Telephone':list_info[tel_index]}
        partner_dict.update(res_dicotel)
    except:
        pass
    try:
        web_index = get_index_substring(list_info,'www.')
        res_dicoweb = {'Website':list_info[web_index]}
        partner_dict.update(res_dicoweb)
    except:
        pass
    try:
        mail_index = get_index_substring(list_info,'@')
        res_dicomail = {'Email':list_info[mail_index]}
        partner_dict.update(res_dicomail)
    except:
        pass
    partner_dict.update({'Address':','.join(list_info[1:tel_index-1])})
    del partner_dict['html']
    return partner_dict

In [269]:
#Get updated info for all partners
list_partners = [info_to_dict(x) for x in list_partners]

In [270]:
#Create a DataFrame of the partners and use Python Geopy to get Country from Latitude and Longitude Column
from geopy.geocoders import Nominatim
geolocator = Nominatim()
df = pd.DataFrame(list_partners)
country_get = lambda x: geolocator.reverse(str(x[0])+','+str(x[1])).raw['address']['country']
df['Country'] = df[['lat','lng']].apply(country_get,axis=1)

In [271]:
df.head()

Unnamed: 0,Address,Email,Telephone,Website,lat,lng,name,type,Country
0,"Street 3, New Borg el Arab 21934, Egypt",sales@unitelegypt.com,T: +20(0)3 459 2070,www.unitelegypt.com,30.8854,29.577,Unitel Co. Material Handling Division,dealer,مصر
1,"M. Moreno 6546, B1875BLR Wilde, Buenos Aires, ...",afernandez@micro.com.ar,T: +54 911 5062-7531,www.microautomacion.com,-34.7062,-58.3127,Automación Micromecánica S.A.I.C.,dealer,Argentina
2,"A Schmalz Company,25 Turbo Drive, AUS-3153 Bay...",sales@millsom.com.au,T: +61 3 9720 7966,www.millsomhoists.com.au,-37.8255,145.275,Millsom Hoists PTY. LTD<br \/>A Schmalz Company,dealer,Australia
3,"Ulica Toma Zupana 16, SI-4202 Naklo",andraz.potocnik@mb-naklo.si,T: +386 42771713,www.mb-naklo.si,46.2712,14.3171,MB-NAKLO d.o.o.,dealer,Slovenija
4,"Miklavska c53c, Spodnje Hoce, 2311 Hoce",saso.vrecar@zlatorogoprema.si,T: +386 2 6130875,www.zlatorogoprema.si,46.498,15.643,Zlatorog Oprema d.o.o.,dealer,Slovenija


In [272]:
#df.to_csv(r'C:\Users\Stephane\Desktop\SC_distrib.csv',index=False)

# PIAB Distribution List 

In [273]:
piab_url = 'https://www.piab.com/en-US/where-to-buy/'