In [7]:
import requests
import pandas as pd

In [9]:
url = "https://data.covid19india.org/v4/min/timeseries.min.json"
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
else:
    print("Failed to retrieve data")


In [12]:
df = pd.DataFrame.from_dict(data, orient='index')

In [13]:
df

Unnamed: 0,dates
AN,"{'2020-03-26': {'delta': {'confirmed': 1}, 'de..."
AP,"{'2020-03-12': {'delta': {'confirmed': 1}, 'de..."
AR,"{'2020-04-02': {'delta': {'confirmed': 1}, 'de..."
AS,"{'2020-03-31': {'delta': {'confirmed': 1}, 'de..."
BR,"{'2020-03-22': {'delta': {'confirmed': 2}, 'de..."
CH,"{'2020-03-19': {'delta': {'confirmed': 1}, 'de..."
CT,"{'2020-03-19': {'delta': {'confirmed': 1}, 'de..."
DL,"{'2020-03-02': {'delta': {'confirmed': 1}, 'de..."
DN,"{'2020-04-09': {'delta': {'tested': 80}, 'delt..."
GA,"{'2020-03-25': {'delta': {'confirmed': 3}, 'de..."


In [15]:
cleaned_data_list = []

for state, state_data in df.items():
    for date, details in state_data.items():
        delta = details.get('delta', {})
        row = {
            'State': state,
            'Date': date,
            'Confirmed': delta.get('confirmed', 0),
            'Deaths': delta.get('deceased', 0),
            'Recovered': delta.get('recovered', 0),
            'Tested': delta.get('tested', 0)
        }
        cleaned_data_list.append(row)


cleaned_data = pd.DataFrame(cleaned_data_list)

cleaned_data['Date'] = pd.to_datetime(cleaned_data['Date'], errors='coerce')


cleaned_data.dropna(subset=['Date'], inplace=True)


numeric_cols = ['Confirmed', 'Deaths', 'Recovered', 'Tested']
cleaned_data[numeric_cols] = cleaned_data[numeric_cols].apply(pd.to_numeric, errors='coerce')


In [19]:
cleaned_data.fillna(0, inplace=True)

cleaned_data.sort_values(by=['State', 'Date'], inplace=True)

cleaned_data['Daily Confirmed'] = cleaned_data.groupby('State')['Confirmed'].diff().fillna(0)
cleaned_data['Daily Deaths'] = cleaned_data.groupby('State')['Deaths'].diff().fillna(0)
cleaned_data['Daily Recovered'] = cleaned_data.groupby('State')['Recovered'].diff().fillna(0)
cleaned_data['Daily Tested'] = cleaned_data.groupby('State')['Tested'].diff().fillna(0)


print(cleaned_data.head())



Empty DataFrame
Columns: [State, Date, Confirmed, Deaths, Recovered, Tested, Daily Confirmed, Daily Deaths, Daily Recovered, Daily Tested]
Index: []


In [20]:
cleaned_data.to_csv('india_covid19_data_cleaned.csv', index=False)
print("Data saved to 'india_covid19_data_cleaned.csv'")

Data saved to 'india_covid19_data_cleaned.csv'


In [21]:
new_df = pd.read_csv('india_covid19_data_cleaned.csv')

In [22]:
new_df

Unnamed: 0,State,Date,Confirmed,Deaths,Recovered,Tested,Daily Confirmed,Daily Deaths,Daily Recovered,Daily Tested


In [23]:
df

Unnamed: 0,dates
AN,"{'2020-03-26': {'delta': {'confirmed': 1}, 'de..."
AP,"{'2020-03-12': {'delta': {'confirmed': 1}, 'de..."
AR,"{'2020-04-02': {'delta': {'confirmed': 1}, 'de..."
AS,"{'2020-03-31': {'delta': {'confirmed': 1}, 'de..."
BR,"{'2020-03-22': {'delta': {'confirmed': 2}, 'de..."
CH,"{'2020-03-19': {'delta': {'confirmed': 1}, 'de..."
CT,"{'2020-03-19': {'delta': {'confirmed': 1}, 'de..."
DL,"{'2020-03-02': {'delta': {'confirmed': 1}, 'de..."
DN,"{'2020-04-09': {'delta': {'tested': 80}, 'delt..."
GA,"{'2020-03-25': {'delta': {'confirmed': 3}, 'de..."
