## Creating an interactive map of india with population parameters

A mapbox access token is not required. 

In [None]:
# install the required packages
# !pip install dash
# !pip install geopandas
# !pip install dash-bootstrap-components
# !pip install openpyxl
# !pip install geojson

In [None]:
# import all the libraries
from dash import html
import plotly.express as px
import geopandas as gpd
from dash import Dash, dcc, Input, Output
import dash_bootstrap_components as dbc
import json
import geojson
import pandas as pd

### Reading the shape file that contains states
1. Downloaded from [here](https://www.diva-gis.org/gdata)

In [None]:
geo_df = gpd.read_file("D:\\Rahul\\Projects\\India interactive app\\IND_adm1.shp")

## 
Using the geopandas plot function for a dataframe can give a brief look at thow the shape file translates to a map 

In [None]:
geo_df.plot()

### We will need a geojson file to use dash with mapbox functions


[Online convertor](https://products.aspose.app/gis/conversion/shapefile-to-geojson)

In [None]:
#

In [None]:
with open("D:\\Rahul\\Projects\\India interactive app\\IND_adm1.geojson", encoding='utf8') as f:
    gj_shp = json.load(f)

In [None]:
gj_shp['features'][0]

## Reading the required files
---
Downloaded from [here](https://censusindia.gov.in/census.website/data/population-finder)

In [None]:
popdata = pd.read_excel("D:\\Rahul\\Projects\\India interactive app\\2011-IndiaState-0000.xlsx")

In [None]:
popdata.head()

###
A lot of columns are not required, we only focus on the state name and the total population

In [None]:
popdata.drop([0,1,2],axis=0,inplace = True)
popdata.drop(popdata[popdata['TRU'] != 'Total'].index, axis = 0, inplace=True)

# We drop all rows except name and total population
popdata.drop(popdata.columns.difference(['Name','TOT_P']),axis=1, inplace=True)

In [None]:
popdata.head()

##
The shape file has the state names under properties->NAME_1

However, the format is different from the one in the data frame. We need to change the format

In [None]:
# We can look at the difference in formats here
l = []
for i in gj_shp['features']:
    print(i['properties']['NAME_1'])
    l.append(i['properties']['NAME_1'])

In [None]:
set([str.lower(i) for i in popdata[popdata['Name']!='ANDHRA PRADESH']['Name']]) - set([str.lower(j) for j in l])

##
Changing the mismatching names

In [None]:
popdata.loc[popdata['Name']=='NCT OF DELHI', 'Name'] = 'DELHI'
popdata.loc[popdata['Name']=='ODISHA','Name'] = 'ORISSA'
popdata.loc[popdata['Name']=='UTTARAKHAND','Name'] = 'UTTARANCHAL'
# popdata.loc[popdata['Name'].str.contains('&'),'Name'] = [str(i).replace('&','and') for i in popdata.loc[popdata['Name'].str.contains('&')]['Name']]

In [None]:
popdata.loc[popdata['Name'] == 'ANDAMAN & NICOBAR ISLANDS', 'Name'] = 'ANDAMAN & NICOBAR'

In [None]:
popdata['Name']

### 
Because the data frame is older than the shape file, it does not have information on Telangana, a state separated from Andhra Pradesh in 2014

We need to add a new row for Telangana. For simplicity, I am making all the figures half of Andhra Pradesh

In [None]:
popdata.loc[len(popdata)] = ['TELANGANA', 42290388]

In [None]:
# Reduce the statistic value for Andhra Pradesh by half
popdata.loc[popdata['Name'] == 'ANDHRA PRADESH','TOT_P'] = 42290389

In [None]:
popdata.sample(10)

##
The string format has to be changed, so we first use the ```title()``` function

In [None]:
popdata['Name']=[i.title() for i in popdata['Name']]

In [None]:
popdata.sample(10)

In [None]:
# Also, the '&' symbol has to be replaces with 'and'
popdata.loc[popdata['Name'].str.contains('&'),'Name'] = [str(i).replace('&','and') for i in popdata.loc[popdata['Name'].str.contains('&')]['Name']]

### A simple choropleth mapbox for the given data set 

In [None]:
fig = px.choropleth_mapbox(popdata, geojson=gj_shp, featureidkey='properties.NAME_1', 
                          locations = 'Name', color='TOT_P',  mapbox_style="carto-positron",
                          zoom=3, center = {"lat": 20.593684, "lon": 78.96288},
                          opacity=0.5)
fig.show()

### Reading more data

In [None]:
data_full = pd.read_csv("D:\\Rahul\\Projects\\India interactive app\\RBI DATA states_wise_population_Income.csv")

In [None]:
data_full.head()

In [None]:
data_full['States_Union Territories'].unique()

Some of the union territories are missing from this data set, we will have to add dummy data

In [None]:
data_full.columns

We choose only a select few columns from the entire data frame

In [None]:
df_compressed = data_full[['States_Union Territories', '2011 -UNEMP', '2011 -Poverty', '2011-12-INC']]

In [None]:
set([str.lower(j) for j in popdata['Name']]) - set([str.lower(i) for i in df_compressed['States_Union Territories']])

In [None]:
df_compressed.sample(10)

In [None]:
# Reformating some of the state names
df_compressed.loc[df_compressed['States_Union Territories']=='Andaman and Nicobar Islands','States_Union Territories'] = 'Andaman and Nicobar'
df_compressed.loc[df_compressed['States_Union Territories']=='Odisha','States_Union Territories'] = 'Orissa'
df_compressed.loc[df_compressed['States_Union Territories']=='Uttarakhand','States_Union Territories'] = 'Uttaranchal'

In [None]:
df_compressed.sample(10)

In [None]:
# Adding Telangana to the data set and reducing values from Andhra Pradesh
df_compressed.loc[len(df_compressed)] = ['Telangana', 6, 4.6, 32386]
df_compressed.loc[df_compressed['States_Union Territories'] == 'Andhra Pradesh'] = ['Andhra Pradesh', 6, 4.6, 32387]

In [None]:
df_compressed

In [None]:
set([str.lower(j) for j in popdata['Name']]) - set([str.lower(i) for i in df_compressed['States_Union Territories']])

These three union territories have to be added

In [None]:
df_compressed.loc[len(df_compressed)] = ['Dadra and Nagar Haveli', 0, 0, 0]
df_compressed.loc[len(df_compressed)] = ['Daman and Diu', 0, 0, 0]
df_compressed.loc[len(df_compressed)] = ['Lakshadweep', 0, 0, 0]

In [None]:
# Rename the column to merge the dataframes 
df_compressed.rename(columns = {'States_Union Territories':'Name'}, inplace= True)

In [None]:
df_merged = pd.merge(df_compressed, popdata, on='Name')

In [None]:
df_merged.head()

### Dash app with interactivity based on the columns of the merged dataframe

In [None]:
app = Dash(__name__)

mytitle = dcc.Markdown('Indian Demographics')
mygraph = dcc.Graph()
mydropdown = dcc.Dropdown(options = [i for i in df_merged.columns[1:]], value='TOT_P',
                          style={'align-items': 'center', 'justify-content': 'center', 'width':'40%'})


app.layout = dbc.Container([mytitle, mydropdown, mygraph])

@app.callback(
    Output(mygraph, 'figure'),
    Input(mydropdown, 'value')
)
def update_graph(selected_value):
    
    fig = px.choropleth_mapbox(df_merged, geojson=gj_shp, featureidkey='properties.NAME_1', 
                          locations = 'Name', color=selected_value,  mapbox_style="carto-positron",
                          zoom=3, center = {"lat": 20.593684, "lon": 78.96288},
                          opacity=0.5)
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    
    return fig

app.run_server(debug=True, jupyter_mode = 'external')
