<a href="https://colab.research.google.com/github/sanjaydasgupta/covid-19/blob/master/Covid19India-Csv-Files.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Generate CSV files for 20 states with highest infection/recovery numbers

This notebook downloads data from [https://www.covid19india.org](https://www.covid19india.org) and creates separate CSV files for each of the 20 states with the highest infection/recovery numbers

## Usual Imports

In [1]:
from datetime import datetime
import pandas as pd
import numpy as np

## Adapter Class for `https://www.covid19india.org`

In [2]:
class Covid19India:

  def __init__(self):
    # Fetch state-code to state-name conversion table
    states_wise_url = 'https://api.covid19india.org/csv/latest/state_wise.csv'
    state_wise_df = pd.read_csv(states_wise_url)
    self.state_name_map = {code: name for code, name in state_wise_df[['State_code', 'State']].values}
    # Fetch state-wise daily COVID19 data
    state_wise_daily_url = 'https://api.covid19india.org/csv/latest/state_wise_daily.csv'
    state_wise_daily_df = pd.read_csv(state_wise_daily_url)
    if state_wise_daily_df.applymap(pd.isnull).sum().sum() > 0:
      raise ValueError("Unknown values found in 'state_wise_daily.csv'")
    state_wise_daily_df['Date'] = pd.to_datetime(state_wise_daily_df['Date']).map(datetime.date)
    self.date_min = state_wise_daily_df['Date'].min()
    self.date_max = state_wise_daily_df['Date'].max()
    self.state_wise_daily_df = state_wise_daily_df

  def __repr__(self):
    return '%s(%r, %r)' % (self.__class__.__name__, self.date_min, self.date_max)

  def get_state_data(self, state_code, date_start=None, date_end=None):
    if state_code not in self.state_name_map:
      print(self.state_name_map.keys())
      raise ValueError("invalid state_code %r" % state_code)
    if date_start:
      date_start = pd.to_datetime(date_start).date()
      if date_start < self.date_min:
        raise ValueError("date_start must be >= %r" % self.date_min)
    else:
      date_start = self.date_min
    if date_end:
      date_end = pd.to_datetime(date_end).date()
      if date_end > self.date_max:
        raise ValueError("date_end must be <= %r" % self.date_max)
    else:
      date_end = self.date_max
    dfs = self.state_wise_daily_df[np.logical_and(self.state_wise_daily_df['Date'] >= date_start, 
        self.state_wise_daily_df['Date'] <= date_end)]
    dfs = dfs[['Date', 'Status', state_code]].groupby(['Date', 'Status']).sum().unstack()
    dfs.columns = [p[1] for p in dfs.columns]
    return dfs.reset_index()

covid19India = Covid19India()
print(covid19India.date_min, covid19India.date_max)

2020-03-14 2021-09-25


## Top 20 States (First full year)

In [3]:
scores = []
for code, name in [item for item in covid19India.state_name_map.items() if item[0] != 'TT']:
  df = covid19India.get_state_data(code, date_end='2021-03-13')
  max_val = max(df['Confirmed'].max(), df['Recovered'].max())
  scores.append((code, max_val))

top_20 = sorted(scores, key=lambda score: -score[1])[:20]
top_20

[('MH', 32007),
 ('KA', 13217),
 ('AP', 12750),
 ('KL', 11755),
 ('DL', 8775),
 ('CT', 8027),
 ('TN', 7758),
 ('UP', 7016),
 ('MP', 5729),
 ('OR', 4761),
 ('AS', 4593),
 ('WB', 4480),
 ('BR', 4140),
 ('RJ', 3314),
 ('JH', 3221),
 ('HR', 3104),
 ('TG', 3018),
 ('PB', 2848),
 ('JK', 2796),
 ('UT', 2078)]

In [4]:
for code, _ in top_20:
  df = covid19India.get_state_data(code, date_end='2021-03-13')
  df.to_csv('%s-2020-21.csv' % code, index=False)

!ls -l *.csv

-rw-r--r-- 1 root root 7895 Sep 26 06:43 AP-2020-21.csv
-rw-r--r-- 1 root root 7441 Sep 26 06:43 AS-2020-21.csv
-rw-r--r-- 1 root root 7641 Sep 26 06:43 BR-2020-21.csv
-rw-r--r-- 1 root root 7725 Sep 26 06:43 CT-2020-21.csv
-rw-r--r-- 1 root root 8192 Sep 26 06:43 DL-2020-21.csv
-rw-r--r-- 1 root root 7767 Sep 26 06:43 HR-2020-21.csv
-rw-r--r-- 1 root root 7343 Sep 26 06:43 JH-2020-21.csv
-rw-r--r-- 1 root root 7498 Sep 26 06:43 JK-2020-21.csv
-rw-r--r-- 1 root root 8075 Sep 26 06:43 KA-2020-21.csv
-rw-r--r-- 1 root root 8054 Sep 26 06:43 KL-2020-21.csv
-rw-r--r-- 1 root root 8874 Sep 26 06:43 MH-2020-21.csv
-rw-r--r-- 1 root root 7925 Sep 26 06:43 MP-2020-21.csv
-rw-r--r-- 1 root root 7690 Sep 26 06:43 OR-2020-21.csv
-rw-r--r-- 1 root root 7724 Sep 26 06:43 PB-2020-21.csv
-rw-r--r-- 1 root root 7971 Sep 26 06:43 RJ-2020-21.csv
-rw-r--r-- 1 root root 7769 Sep 26 06:43 TG-2020-21.csv
-rw-r--r-- 1 root root 8219 Sep 26 06:43 TN-2020-21.csv
-rw-r--r-- 1 root root 8101 Sep 26 06:43 UP-2020