In [2]:
from dstaster import *

import folium # This notebook needs folium installed in order to display maps

In [3]:
cultural = pd.read_csv('datasets/inner_london_cultural.csv')
cultural

Unnamed: 0.1,Unnamed: 0,borough,type,site_name,borough_name,latitude,longitude
0,0,City of London,Archives,Baring Archive,City and County of th,51.515517,-0.089226
1,1,City of London,Archives,LexisNexis Butterworths,City and County of th,51.516313,-0.111522
2,2,City of London,Archives,Employment Policy Institute,City and County of th,51.515068,-0.075887
3,3,City of London,Archives,Prudential Group Archives,City and County of th,51.510391,-0.089040
4,4,City of London,Archives,Chartered Insitute of Taxation,City and County of th,51.515205,-0.110328
...,...,...,...,...,...,...,...
2990,888,Camden,Theatres,Pentameters Theatre,Camden,51.556282,-0.180041
2991,889,Camden,Theatres,Platform Theatre,Camden,51.537431,-0.125196
2992,890,Camden,Theatres,The Place,Camden,51.527811,-0.130565
2993,891,Camden,Theatres,Kings Place,Camden,51.534748,-0.123577


<h2>Plot on map</h2>

In [4]:
def positions(text):
    index = cultural['type'] == text
    return cultural.loc[index, ['latitude', 'longitude']].iterrows()

In [54]:
pos = [51.517, -0.12] 

mp = folium.Map(location=pos, tiles='Stamen Toner', zoom_start=14, zoom_control=False)
        
for i, pos in positions("Commercial galleries"):
    folium.Circle(
        radius=10,
        location=pos.values,
        color=colors['blue'],
        fill=True,
        fill_opacity=1.0
    ).add_to(mp)
    
for i, pos in positions("Jewellery design"):
    folium.Circle(
        radius=10,
        location=pos.values,
        color=colors['salmon'],
        fill=True,
        fill_opacity=1.0
    ).add_to(mp)

        
mp

In [52]:
from itertools import chain 

a = np.array([51.512824, -0.117294])
b = np.array([51.527226, -0.130642])
diff = b-a
print(diff)
orth = np.array([-diff[1], diff[0]])
print(orth)

 
mp = folium.Map(location=pos, tiles='Stamen Toner', zoom_start=14, zoom_control=False)
    

for i, pos in chain(positions("Commercial galleries"), positions("Jewellery design")):
    if np.dot(pos.values-a, orth) < 0:
        color = 'blue' # Gallery
    else:
        color = 'salmon' # Jewellery
    folium.Circle(
        radius=10,
        location=pos.values,
        color=colors[color],
        fill=True,
        fill_opacity=1.0
    ).add_to(mp)


folium.PolyLine([a-diff,b+diff], color='white', weight=10, opacity=1).add_to(mp)
folium.PolyLine([a-diff,b+diff], color='green', weight=6, opacity=1).add_to(mp)
      
    
galleries_total = sum(cultural['type'] == "Commercial galleries")
galleries_positive = 0
jewelleries_total = sum(cultural['type'] == "Jewellery design")
jewelleries_positive = 0
for i, pos in positions("Commercial galleries"):
    if np.dot(pos.values-a, orth) < 0:
        galleries_positive += 1
    
for i, pos in positions("Jewellery design"):
    if np.dot(pos.values-a, orth) >= 0:
        jewelleries_positive += 1
  
print(f"Galleries {100*galleries_positive/galleries_total:2f}%")
print(f"Jewelleries {100*jewelleries_positive/jewelleries_total:2f}%")

mp

[ 0.014402 -0.013348]
[0.013348 0.014402]
Galleries 80.232558%
Jewelleries 52.816901%


In [51]:
from itertools import chain 

ny = np.array([40.771985, -73.959106])

 
mp = folium.Map(location=ny, tiles='Stamen Toner', zoom_start=16, zoom_control=False)
    

mp

<h2>Data cleaning (done)</h2>

In [5]:
london = pd.read_csv('businesses-in-london.csv')

In [16]:
london = london.drop(london.columns[33:53], axis=1)

In [18]:
london = london.drop(['DissolutionDate', 'IncorporationDate',
       'Accounts.AccountRefDay', 'Accounts.AccountRefMonth',
       'Accounts.NextDueDate', 'Accounts.LastMadeUpDate',
       'Accounts.AccountCategory', 'Returns.NextDueDate',
       'Returns.LastMadeUpDate', 'Mortgages.NumMortCharges',
       'Mortgages.NumMortOutstanding', 'Mortgages.NumMortPartSatisfied',
       'Mortgages.NumMortSatisfied'], axis=1)

In [20]:
london = london.drop(['oa11','laua','ward','lsoa11', 'msoa11', 'wz11', 'calncv', 'stp'], axis=1)

In [23]:
london = london.drop(['CompanyNumber', 'RegAddress.CareOf', 'RegAddress.POBox',
       'RegAddress.AddressLine1', 'RegAddress.AddressLine2',
       'RegAddress.PostTown', 'RegAddress.County', 'RegAddress.Country',
       'LimitedPartnerships.NumGenPartners', 'LimitedPartnerships.NumLimPartners', 'URI', 'pcds'], axis=1)

In [25]:
london = london.drop(['CompanyCategory', 'CountryOfOrigin'], axis=1)

In [32]:
london.head()

Unnamed: 0,CompanyName,RegAddress.PostCode,CompanyStatus,SICCode.SicText_1,SICCode.SicText_2,SICCode.SicText_3,SICCode.SicText_4,lat,long
0,!BIG IMPACT GRAPHICS LIMITED,EC1V9LT,Active,18129 - Printing n.e.c.,59112 - Video production activities,63120 - Web portals,74201 - Portrait photographic activities,51.52708,-0.079703
1,!L PRODUCTIONS LIMITED,E48EJ,Active,90030 - Artistic creation,,,,51.617737,-0.024699
2,!NKED LTD,SW98QS,Active,47710 - Retail sale of clothing in specialised...,,,,51.460017,-0.106516
3,!NVERTD DESIGNS LIMITED,W128DS,Active,58190 - Other publishing activities,,,,51.503989,-0.225428
4,!YOZO FASS LIMITED,SE41NQ,Active,90010 - Performing arts,,,,51.46223,-0.032256


In [39]:
london = london.rename({'CompanyName': 'Name', 'RegAddress.PostCode': 'PostCode',
            'SICCode.SicText_1': 'Text1', 'SICCode.SicText_2': 'Text2', 
            'SICCode.SicText_3': 'Text3', 'SICCode.SicText_4': 'Text4',
             'lat': 'Lat', 'long': 'Long'}, axis=1)

In [40]:
london.to_csv('london.csv')