In [1]:
import json
import pandas as pd
import plotly.express as px
import numpy as np

# to avoid making the jupyter notebook heavy

# import plotly.io as pio
# pio.renderers.default='google-chrome'
# then if we want to see any figure and call fig.show(),it will display in another tab and jupyter notebook will be easy 

indian_states_geojson = json.load(open('states_india_geojson.geojson','r'))
indian_states_geojson['features'][0]

df = pd.read_csv('https://raw.githubusercontent.com/nikhilkumarsingh/choropleth-python-tutorial/master/india_census.csv')
df.head()

Unnamed: 0,Rank,State or union territory,Population,Population (%),Decadal growth(2001–2011),Rural population,Percent rural,Urban population,Percent urban,Area[16],Density[a],Sex ratio
0,1.0,Uttar Pradesh,199812341,,20.20%,155317278,,44495063,,"240,928 km2 (93,023 sq mi)","828/km2 (2,140/sq mi)",912
1,2.0,Maharashtra,112374333,,20.00%,61556074,,50818259,,"307,713 km2 (118,809 sq mi)",365/km2 (950/sq mi),929
2,3.0,Bihar,104099452,,25.40%,92341436,,11758016,,"94,163 km2 (36,357 sq mi)","1,102/km2 (2,850/sq mi)",918
3,4.0,West Bengal,91276115,,13.80%,62183113,,29093002,,"88,752 km2 (34,267 sq mi)","1,029/km2 (2,670/sq mi)",953
4,5.0,Madhya Pradesh,72626809,,16.30%,52557404,,20069405,,"308,245 km2 (119,014 sq mi)",236/km2 (610/sq mi),931


In [2]:
## Apply preprocessing
# - apply transformations to seperate the density to integer
# - split it via "/" and replace "," with empty space "" 



df['Density[a]'][3].split('/')[0].replace(",","")

df['Density']=df['Density[a]'].apply(lambda x : int(x.split("/")[0].replace(",","")) )
df['Density']



0       828
1       365
2      1102
3      1029
4       236
5       555
6       201
7       319
8       308
9       303
10      269
11      312
12      859
13      414
14      397
15      550
16      189
17      573
18      189
19      123
20      350
21      132
22      122
23      119
24      394
25       17
26       52
27       86
28    11297
29      297
30     2598
31     9252
32      970
33      970
34       46
35     2013
Name: Density, dtype: int64

In [3]:
## mapping of geojson to df
# - we have to do mapping from geojson features and the dataframe that we have, for that, we need to add id to each feature of geojson file 


indian_states_geojson['features'][0].keys()

indian_states_geojson['features'][0]['properties']

indian_states_geojson['features'][0]['properties']['state_code']

state_id_map={}
for feature in indian_states_geojson['features']:
    # creating id to state code of properties of each feature
    feature['id'] = feature['properties']['state_code']
    # now we have to map id with state names, we are creating a dictionary->state_id_map, this will be handy to map ids with state names
    state_id_map[feature['properties']['st_nm']] = feature['id']

#map the id with state_id_map values
df['id']= df['State or union territory'].apply(lambda x : state_id_map[x] )
df['id']

0      9
1     27
2     10
3     19
4     23
5     33
6      8
7     29
8     24
9     28
10    21
11     0
12    32
13    20
14    18
15     3
16    22
17     6
18     5
19     2
20    16
21    17
22    14
23    13
24    30
25    12
26    15
27    11
28     7
29     1
30    34
31     4
32    26
33    25
34    35
35    31
Name: id, dtype: int64

In [None]:

# locations will be name of the column, whose values will be used to mapping with feature id,that is id column
# fig = px.choropleth(df,locations=df['id'],geojson = indian_states_geojson,color='Density')
# fig.show()
# here we have india is very small,

# fig = px.choropleth(df,locations=df['id'],geojson = indian_states_geojson,color='Density',scope='asia')
# fig.show()
# it will still show the countries in asia



fig = px.choropleth(df,locations=df['id'],geojson = indian_states_geojson,color='Density',scope='asia')
fig.update_geos(fitbounds='locations',visible=False)
fig.show()
# show only india

In [None]:




# here the spread of data is too much, w

df['Density']
# we have some values which are varying a lot 
# then we can convert these values to their logarithmic values ,since current visualisation is not showing good results 

np.log10(df['Density'])
# now the values are from 1 to 4 , we can call it as a new colum

df['DensityScale'] = np.log10(df['Density'])

fig = px.choropleth(df,locations='id',geojson=indian_states_geojson,color='DensityScale',scope='asia')
fig.update_geos(fitbounds='locations',visible=False)
fig.show()
# now we can see more color variantions 