In [1]:
import pandas as pd
import numpy as np

## Problem Statement
Enter users Latitude and Longitude and Display the nearest 5 Pubs on the map. Use Euclidean Distance to find the nearest pubs.


In [2]:
df=pd.read_csv("open_pubs.csv",header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.979340,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,\N,\N,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,\N,\N,Babergh


In [3]:
# Add column name according to the data dictionary
df.columns=["fsa_id","name","address","postcode","easting","northing","latitude","longitude","local_authority"]
df.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.979340,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,\N,\N,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,\N,\N,Babergh


In [4]:
df.shape

(51331, 9)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51331 entries, 0 to 51330
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   fsa_id           51331 non-null  int64 
 1   name             51331 non-null  object
 2   address          51331 non-null  object
 3   postcode         51331 non-null  object
 4   easting          51331 non-null  int64 
 5   northing         51331 non-null  int64 
 6   latitude         51331 non-null  object
 7   longitude        51331 non-null  object
 8   local_authority  51331 non-null  object
dtypes: int64(3), object(6)
memory usage: 3.5+ MB


In [6]:
df.columns

Index(['fsa_id', 'name', 'address', 'postcode', 'easting', 'northing',
       'latitude', 'longitude', 'local_authority'],
      dtype='object')

## Data Cleaning

In [7]:
## Check for nullity
df.isnull().sum()

fsa_id             0
name               0
address            0
postcode           0
easting            0
northing           0
latitude           0
longitude          0
local_authority    0
dtype: int64

In [8]:
df.replace("\\N", np.nan, inplace=True)

In [9]:
df.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.97934,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,,,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,,,Babergh


In [10]:
df.isna().sum()

fsa_id               0
name                 0
address              0
postcode             0
easting              0
northing             0
latitude           767
longitude          767
local_authority      0
dtype: int64

In [11]:
df["latitude"].median()

52.502652

In [12]:
df["longitude"].median()

-1.5846520000000002

In [13]:
## Replacing missing values from "latitude" and "longitude" column with their median values.

In [14]:
df["latitude"].replace(np.nan,df["latitude"].median(),inplace=True)

In [15]:
df["longitude"].replace(np.nan,df["longitude"].median(),inplace=True)

In [16]:
df.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.97934,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,52.502652,-1.584652,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,52.502652,-1.584652,Babergh


In [17]:
df.dtypes

fsa_id              int64
name               object
address            object
postcode           object
easting             int64
northing            int64
latitude           object
longitude          object
local_authority    object
dtype: object

In [18]:
## Convert "latitude" and "longitude" into appropriate form of datatype

In [19]:
df["latitude"]=df["latitude"].astype(float)
df["longitude"]=df["longitude"].astype(float)


In [20]:
df.dtypes

fsa_id               int64
name                object
address             object
postcode            object
easting              int64
northing             int64
latitude           float64
longitude          float64
local_authority     object
dtype: object

In [21]:
##The column "name" is name of pub
df["name"].nunique()

36335

In [22]:
cleaned_data=df
cleaned_data

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.979340,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,52.502652,-1.584652,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,52.502652,-1.584652,Babergh
...,...,...,...,...,...,...,...,...,...
51326,597119,Wrexham & District War Memorial Club Ltd,"Wrexham War Memorial Club Farndon Street, Wrex...",LL13 8DE,333909,350438,53.047100,-2.987319,Wrexham
51327,597127,Wrexham Lager Social Club,"1 - 3 Union Road, Wrexham, Wrexham",LL13 7SR,333028,350563,53.048123,-3.000485,Wrexham
51328,597130,Wrexham Rail Sports & Social Club,"Sports And Social Club, 44 Brook Street, Wrexh...",LL13 7LU,333259,350213,53.044998,-2.996966,Wrexham
51329,597131,Wrexham Rugby Club,"Wrexham Rugby Club Bryn Estyn Road, Wrexham, W...",LL13 9TY,335808,351078,53.053094,-2.959124,Wrexham


In [23]:
## Convert this cleaned data into csv file
df.to_csv('C:/Users/Pranshu/OneDrive/Desktop/ds_internship_2023_innomatics/Open_Pub_Data_app/cleaned_data.csv', index=False)

## 5 Nearest pub location by distance

In [24]:
## 1st I create a numpy array of my/users current longitude and latitude position

In [40]:
user_lat = float(input("Enter the latitude: "))
user_lon = float(input("Enter the longitude: "))

# Create a NumPy array with the entered coordinates
my_loc = np.array((user_lat, user_lon))

# Print the array to the console
print("array:", my_loc)

Enter the latitude: 50
Enter the longitude: 1.2
array: [50.   1.2]


In [41]:
df_array=np.array([df["latitude"],df["longitude"]]).T
df_array

array([[51.970379,  0.97934 ],
       [51.958698,  1.057832],
       [52.038595,  0.729915],
       ...,
       [53.044998, -2.996966],
       [53.053094, -2.959124],
       [53.076638, -3.050512]])

In [43]:
## Now Euclidean distance 
df['distance'] = ((df['latitude'] - user_lat) ** 2 + (df['longitude'] - user_lon) ** 2) ** 0.5
df.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority,distance
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.97934,Babergh,1.982696
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh,1.963851
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh,2.092092
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,52.502652,-1.584652,Babergh,3.744002
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,52.502652,-1.584652,Babergh,3.744002


In [44]:
df["distance"].sort_values(ascending=True).head(5)

24343    0.942184
24314    0.956140
24288    0.976855
24324    0.988227
24348    0.994766
Name: distance, dtype: float64

In [45]:
df.sort_values(by="distance",ascending=True).head(5)

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority,distance
24343,287427,The Britannia,"Dungeness Road, Dungeness, Kent",TN29 9ND,609200,117035,50.914852,0.974708,Folkestone and Hythe,0.942184
24314,287174,Pilot Inn,"Coast Drive, Lydd On Sea, Kent",TN29 9NJ,609007,118577,50.928769,0.972863,Folkestone and Hythe,0.95614
24288,286878,Heron Park Venue Limited,"The Heron, Herons Park, Dengemarsh Road, Lydd",TN29 9JH,604407,118762,50.932067,0.907603,Folkestone and Hythe,0.976855
24324,287238,Romney Sands Holiday Village,"The Parade, Greatstone, Kent",TN28 8RN,608202,121965,50.959476,0.963359,Folkestone and Hythe,0.988227
24348,287455,The Dolphin Hotel,"11 South Street, Lydd, Kent",TN29 9DQ,604325,120846,50.950822,0.907601,Folkestone and Hythe,0.994766


In [46]:
df["distance"].min()                 ### This is the most nearest pub from my location

0.9421839879598921

Hence, all the 5 nearest location lies under the same local_Authority with distance=[0.942184,0.956140,0.976855,0.988227 and 0.994766].