In [1]:
import pandas as pd
import numpy as np

## Problem Statement
Enter users Latitude and Longitude and Display the nearest 5 Pubs on the map. Use Euclidean Distance to find the nearest pubs.


In [2]:
df=pd.read_csv("open_pubs.csv",header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.979340,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,\N,\N,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,\N,\N,Babergh


In [3]:
# Add column name according to the data dictionary
df.columns=["fsa_id","name","address","postcode","easting","northing","latitude","longitude","local_authority"]
df.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.979340,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,\N,\N,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,\N,\N,Babergh


In [4]:
df.shape

(51331, 9)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51331 entries, 0 to 51330
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   fsa_id           51331 non-null  int64 
 1   name             51331 non-null  object
 2   address          51331 non-null  object
 3   postcode         51331 non-null  object
 4   easting          51331 non-null  int64 
 5   northing         51331 non-null  int64 
 6   latitude         51331 non-null  object
 7   longitude        51331 non-null  object
 8   local_authority  51331 non-null  object
dtypes: int64(3), object(6)
memory usage: 3.5+ MB


In [6]:
df.columns

Index(['fsa_id', 'name', 'address', 'postcode', 'easting', 'northing',
       'latitude', 'longitude', 'local_authority'],
      dtype='object')

## Data Cleaning

In [7]:
## Check for nullity
df.isnull().sum()

fsa_id             0
name               0
address            0
postcode           0
easting            0
northing           0
latitude           0
longitude          0
local_authority    0
dtype: int64

In [8]:
df.replace("\\N", np.nan, inplace=True)

In [9]:
df.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.97934,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,,,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,,,Babergh


In [10]:
df.isna().sum()

fsa_id               0
name                 0
address              0
postcode             0
easting              0
northing             0
latitude           767
longitude          767
local_authority      0
dtype: int64

In [11]:
df["latitude"].median()

52.502652

In [12]:
df["longitude"].median()

-1.5846520000000002

In [13]:
## Replacing missing values from "latitude" and "longitude" column with their median values.

In [14]:
df["latitude"].replace(np.nan,df["latitude"].median(),inplace=True)

In [15]:
df["longitude"].replace(np.nan,df["longitude"].median(),inplace=True)

In [16]:
df.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.97934,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,52.502652,-1.584652,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,52.502652,-1.584652,Babergh


In [17]:
df.dtypes

fsa_id              int64
name               object
address            object
postcode           object
easting             int64
northing            int64
latitude           object
longitude          object
local_authority    object
dtype: object

In [18]:
## Convert "latitude" and "longitude" into appropriate form of datatype

In [19]:
df["latitude"]=df["latitude"].astype(float)
df["longitude"]=df["longitude"].astype(float)


In [20]:
df.dtypes

fsa_id               int64
name                object
address             object
postcode            object
easting              int64
northing             int64
latitude           float64
longitude          float64
local_authority     object
dtype: object

In [21]:
##The column "name" is name of pub
df["name"].nunique()

36335

## 5 Nearest pub location by distance

In [22]:
## 1st I create a numpy array of my/users current longitude and latitude position

In [24]:
x = float(input("Enter the latitude: "))
y = float(input("Enter the longitude: "))

# Create a NumPy array with the entered coordinates
my_loc = np.array((x, y))

# Print the array to the console
print("array:", my_loc)

Enter the latitude: 50
Enter the longitude: 1.5
array: [50.   1.5]


In [25]:
df_array=np.array([df["latitude"],df["longitude"]]).T
df_array

array([[51.970379,  0.97934 ],
       [51.958698,  1.057832],
       [52.038595,  0.729915],
       ...,
       [53.044998, -2.996966],
       [53.053094, -2.959124],
       [53.076638, -3.050512]])

In [26]:
## Now Euclidean distance 
df["distance"]=np.sum((df_array-my_loc)**2, axis=1)
df.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority,distance
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.97934,Babergh,4.15348
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh,4.03201
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh,4.7489
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,52.502652,-1.584652,Babergh,15.778345
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,52.502652,-1.584652,Babergh,15.778345


In [27]:
df["distance"].sort_values(ascending=True).head(5)

24343    1.112886
24314    1.140485
24324    1.208578
24288    1.219683
24356    1.227446
Name: distance, dtype: float64

In [28]:
df.sort_values(by="distance",ascending=True).head(5)

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority,distance
24343,287427,The Britannia,"Dungeness Road, Dungeness, Kent",TN29 9ND,609200,117035,50.914852,0.974708,Folkestone and Hythe,1.112886
24314,287174,Pilot Inn,"Coast Drive, Lydd On Sea, Kent",TN29 9NJ,609007,118577,50.928769,0.972863,Folkestone and Hythe,1.140485
24324,287238,Romney Sands Holiday Village,"The Parade, Greatstone, Kent",TN28 8RN,608202,121965,50.959476,0.963359,Folkestone and Hythe,1.208578
24288,286878,Heron Park Venue Limited,"The Heron, Herons Park, Dengemarsh Road, Lydd",TN29 9JH,604407,118762,50.932067,0.907603,Folkestone and Hythe,1.219683
24356,287477,The Jolly Fisherman Pub,"Jolly Fisherman Hotel, The Parade, Greatstone,...",TN28 8ST,608082,122979,50.968634,0.962232,Folkestone and Hythe,1.227446


In [29]:
df["distance"].min()                 ### This is the most nearest pub from my location

1.112885867168006