In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import random
import os 
import sys
import warnings
import requests
# from requests.adapters import HTTPAdapter
# from requests.packages.urllib3.util.retry import Retry
import plotly.express as px
from datetime import datetime
from sklearn.manifold import MDS
import numpy.linalg as LA
# from sklearn.decomposition import PCA
# from sklearn.cluster import KMeans

## Data Preparation:

In [15]:
# Read latitude-longitude of a few (150) Indian cities from the database "in.csv"

df = pd.read_csv("in.csv")
df = df.reset_index(drop=True)
df = df.drop(columns=['country', 'iso2', 'capital', 'population_proper'])
df = df.rename(columns={"city": "City", "lat": "Latitude", "lng": "Longitude"})
# df.head()
df


Unnamed: 0,City,Latitude,Longitude,admin_name,population
0,Delhi,28.6100,77.2300,Delhi,32226000
1,Mumbai,19.0761,72.8775,Mahārāshtra,24973000
2,Kolkāta,22.5675,88.3700,West Bengal,18502000
3,Bangalore,12.9789,77.5917,Karnātaka,15386000
4,Chennai,13.0825,80.2750,Tamil Nādu,12395000
...,...,...,...,...,...
145,Bendravādi,12.3636,76.9137,Karnātaka,2684
146,Kodagihalli,12.9771,77.4651,Karnātaka,2585
147,Harna Buzurg,25.0981,87.0148,Bihār,2579
148,Mailanhalli,13.1863,77.6963,Karnātaka,2543


In [16]:
from geopy import distance

# Distance Matrix
Dist = np.zeros((150,150))
for i in range(150):
    for j in range(150):
        Dist[i][j] = distance.distance((df['Latitude'][i],df['Longitude'][i]),(df['Latitude'][j],df['Longitude'][j])).km
Dist

array([[   0.        , 1144.95153567, 1303.04795267, ..., 1046.93018092,
        1708.40620445,  865.67109989],
       [1144.95153567,    0.        , 1657.3905591 , ..., 1602.73177095,
         830.76988684, 1460.13749669],
       [1303.04795267, 1657.3905591 ,    0.        , ...,  312.43400526,
        1534.13907742,  455.97696347],
       ...,
       [1046.93018092, 1602.73177095,  312.43400526, ...,    0.        ,
        1641.58724484,  183.81419177],
       [1708.40620445,  830.76988684, 1534.13907742, ..., 1641.58724484,
           0.        , 1578.65243791],
       [ 865.67109989, 1460.13749669,  455.97696347, ...,  183.81419177,
        1578.65243791,    0.        ]])

In [17]:
# Assigning zones

zones = {    "North": ['Delhi', 'Himāchal Pradesh', 'Punjab', 'Uttar Pradesh',  'Haryāna', 'Jammu and Kashmīr', 'Chandīgarh'],
    "East": ['Bihār', 'Odisha', 'Jhārkhand', 'West Bengal'],
    "West": ['Rājasthān' , 'Gujarāt', 'Goa', 'Mahārāshtra'],
    "South": ['Andhra Pradesh', 'Telangāna', 'Karnātaka', 'Kerala', 'Tamil Nādu', 'Puducherry'],
    "Central": ['Madhya Pradesh', 'Chhattīsgarh'],
    "North East": ['Assam']
}

df['Zone'] = df['admin_name'].map(lambda x: [k for k,v in zones.items() if x in v][0])
df


Unnamed: 0,City,Latitude,Longitude,admin_name,population,Zone
0,Delhi,28.6100,77.2300,Delhi,32226000,North
1,Mumbai,19.0761,72.8775,Mahārāshtra,24973000,West
2,Kolkāta,22.5675,88.3700,West Bengal,18502000,East
3,Bangalore,12.9789,77.5917,Karnātaka,15386000,South
4,Chennai,13.0825,80.2750,Tamil Nādu,12395000,South
...,...,...,...,...,...,...
145,Bendravādi,12.3636,76.9137,Karnātaka,2684,South
146,Kodagihalli,12.9771,77.4651,Karnātaka,2585,South
147,Harna Buzurg,25.0981,87.0148,Bihār,2579,East
148,Mailanhalli,13.1863,77.6963,Karnātaka,2543,South


## MDS Implementation:

In [18]:
# MDS
'''
random state 0 means that the algorithm will be deterministic, i.e. it will produce the same results each time.
dissimilarity = precomputed means that we will directly give the distance matrix as input to the algorithm
n_components = 2 means that we want the output to be a 2D array
'''
mds = MDS(random_state=0, dissimilarity='precomputed', n_components=2)
fit_data=mds.fit_transform(Dist)
fit_data # should contain an array of 2D points



array([[ -848.63359039,   371.12169091],
       [  183.85670482,   870.17939916],
       [ -177.63364635,  -746.0630776 ],
       [  881.9036325 ,   396.89524137],
       [  877.83367317,   105.05183402],
       [  397.81094898,   286.44030401],
       [  251.53124692,   770.96183211],
       [ -256.08976243,   872.08582404],
       [  -48.38427732,   860.2872392 ],
       [ -486.93005644,   -78.57867529],
       [ -644.39863727,     9.32873556],
       [ -665.65100345,   521.28542962],
       [ -601.06956939,    71.67583244],
       [ -454.45163917,  -151.18225419],
       [  -17.06185335,   212.77954802],
       [ -854.55603378,   352.23081783],
       [ -171.10030549,   814.13555359],
       [  370.18868391,  -225.51605028],
       [ -202.49055742,   540.09358624],
       [  171.06435666,   859.43905437],
       [ -254.87521743,   376.8379456 ],
       [  240.33304285,   775.93429578],
       [ -505.99623989,  -405.6213715 ],
       [ -115.96067696,  -106.81993529],
       [-1109.89

## Visualization:

In [19]:
# Create a dataframe for the bubble plot
df_bubble = pd.DataFrame(fit_data, columns=['x', 'y'])
df_bubble['City'] = df['City']
df_bubble['Population'] = df['population']
df_bubble['Zone'] = df['Zone']
df_bubble

Unnamed: 0,x,y,City,Population,Zone
0,-848.633590,371.121691,Delhi,32226000,North
1,183.856705,870.179399,Mumbai,24973000,West
2,-177.633646,-746.063078,Kolkāta,18502000,East
3,881.903633,396.895241,Bangalore,15386000,South
4,877.833673,105.051834,Chennai,12395000,South
...,...,...,...,...,...
145,947.280690,472.938642,Bendravādi,2684,South
146,881.646636,410.659435,Kodagihalli,2585,South
147,-454.109604,-599.434248,Harna Buzurg,2579,East
148,859.369753,384.824597,Mailanhalli,2543,South


In [20]:
# # Create a bubble plot
fig = px.scatter(df_bubble, x='x', y='y', size='Population', color='Zone', hover_data=['City'])
fig.show()