In [None]:
import numpy as np 
import pandas as pd
import json
from geopy.geocoders import Nominatim 
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import re
import seaborn as sns

## scraping wikipedia webpage using Beautyfulshop library

In [None]:
res = requests.get("https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_India")
soup = BeautifulSoup(res.content,'lxml')

In [None]:

table = soup.find_all('table')[4] 
df= pd.read_html(str(table))


## converting it into pandas dataframe

In [None]:
day_to_day = pd.DataFrame(df[0]).T
day_to_day.head()


## Data cleaning

In [None]:
covid_ind = day_to_day.reset_index()

In [None]:
covid_ind.head()

In [None]:
covid_ind.iloc[0,1] = 'State'

In [None]:
covid_ind.head()

Removing unnecessary columns.

In [None]:

covid_ind = covid_ind.drop(['level_0',67,66],axis=1)

In [None]:
covid_ind.columns = covid_ind.iloc[0,:]
covid_ind = covid_ind.drop([0,39])
covid_ind.head()

In [None]:
covid_ind.info()

In [None]:
covid_ind.describe()

## Filling mising values

In [None]:
covid_ind = covid_ind.fillna('0')

In [None]:
covid_ind.loc[34,'Apr-25']

In [None]:
covid_ind = covid_ind.replace(covid_ind.loc[1,'Deaths'],'0')
covid_ind = covid_ind.replace(covid_ind.loc[37,'Deaths'],'0')

In [None]:
covid_ind

In [None]:
#converting all data type into string
covid_ind = covid_ind.astype('string')

We have to convert weird type data into simple on

In [None]:
def simplyfy(str1):
    
    ''' using regular expression to convert weird format data into required '''
    
    str1 = re.sub(r'\(.*?\)', '', str1)
    str1 = re.sub(r'\[.*?\]', '', str1)
    str1 = re.sub(r'\(.*?\)\[.*?\]', '', str1)
    
        
    
    try:
        str1 = int(str1)
    except ValueError:
        str1 = str1
    return str1


Applying defined function into whole dataframe 

In [None]:
for i,key in enumerate(covid_ind.columns):
    
    covid_ind[key]= covid_ind[key].apply(simplyfy)

In [None]:
covid_ind.head()

In [None]:
covid_ind.info()

In [None]:
df_plot = covid_ind.iloc[:-5,:].set_index('State',drop=True)

In [None]:
df_plot

### Defining function to plot bar chart of each day recorded cases of require state

In [None]:
def state_plot(state_name='Gujarat'):
    states = list(df_plot.index)
    
    req = state_name
    if req == '':
        req = 'Gujarat'
    try:
        req = states.index(req)
        df_plot.iloc[req,:-2].plot(kind='bar',figsize=(26,12))
        plt.title(str(df_plot.index[req]))
        plt.xlabel('Date')
        plt.ylabel('No of patients')
        plt.show()
    except ValueError:
        print('\n','Enter valid state name')
        print(states,'\n')


In [None]:
state_plot('Gujarat')

In [None]:
state_plot('Maharashtra')

### Building choropleth map using folium. 
    Building choropleth map of each state's total cases till 29th april,2020.

In [None]:
india_geo = r'india.json' # geojson file

# create a numpy array of length 6 and has linear spacing from the minium total immigration to the maximum total Cases
threshold_scale = np.linspace(covid_ind.iloc[:-5,:]['Total'].min(),
                              covid_ind.iloc[:-5,:]['Total'].max(),
                              6, dtype=int)
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 # make sure that the last value of the list is greater than the maximum cases

# let Folium determine the scale.
india = folium.Map(location=[20.5937, 78.9629], zoom_start=5, tiles='Mapbox Bright')
india.choropleth(
    geo_data=india_geo,
    data=covid_ind.iloc[:-5,:],
    columns=['State','Total'],
    key_on='feature.properties.NAME_1',
    threshold_scale=threshold_scale,
    fill_color='YlGn', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Coroa cases in india',
    reset=True)
india

Building choropleth map of each state's total Death till 29th april,2020.

In [None]:
india_geo = r'india.json' # geojson file

# create a numpy array of length 6 and has linear spacing from the minium total cases to the maximum total Cases
threshold_scale = np.linspace(covid_ind.iloc[:-5,:]['Deaths'].min(),
                              covid_ind.iloc[:-5,:]['Deaths'].max(),
                              6, dtype=int)
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 # make sure that the last value of the list is greater than the maximum cases

# let Folium determine the scale.
india = folium.Map(location=[20.5937, 78.9629], zoom_start=5, tiles='Mapbox Bright')
india.choropleth(
    geo_data=india_geo,
    data=covid_ind.iloc[:-5,:],
    columns=['State','Deaths'],
    key_on='feature.properties.NAME_1',
    threshold_scale=threshold_scale,
    fill_color='YlGn', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Death due to Corona in india',
    reset=True)
india

### Building Bar charts

In [None]:
ax = df_plot['Total'].plot(kind='bar',figsize=(15,10),color=['b'])
for p in ax.patches:

    ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
plt.title('India all states total cases',fontdict={'fontsize':22})
plt.show()

In [None]:
ax = df_plot['Deaths'].plot(kind='bar',figsize=(20,12),color=['r'])
for p in ax.patches:

    ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
plt.title('India all states total Deaths',fontdict={'fontsize':22})    
plt.show()

## Locating highest rated hospital's in Mumbai city using foursqure

In [None]:
#finding mumbai's latitude and longitude

address = 'Mumbai , India'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai City are {}, {}.'.format(latitude, longitude))

In [None]:
mumbai = folium.Map(location=[latitude, longitude], zoom_start=12)

In [None]:
mumbai

In [None]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20190404' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

In [None]:
search_query ='hospital'
LIMIT = 100
radius = 8000

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)

In [None]:
results = requests.get(url).json()

In [None]:
results

In [None]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
hospital = json_normalize(venues)
hospital.head()

In [None]:
hospital.shape

In [None]:
# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

# loop through the 100 crimes and add each to the incidents feature group
for lat, lng, in zip(hospital['location.lat'], hospital['location.lng']):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

    
    
# add incidents to map
mumbai.add_child(incidents)

