# COVID 19 API

In [2]:
import requests
import pandas as pd
import json

# India Cases and Deaths

In [2]:

# Make a GET request to the COVID-19 API endpoint for daily deaths and positive cases data
url = "https://api.covid19api.com/country/india?from=2020-01-01T00:00:00Z&to=2023-01-01T00:00:00Z"
response = requests.get(url)

# Parse the JSON response and extract the daily deaths and positive cases data
data = json.loads(response.text)
dates = [d['Date'] for d in data]
deaths = [d['Deaths'] for d in data]
cases = [d['Confirmed'] for d in data]

# Create a pandas dataframe to store the daily deaths and positive cases data
india_df = pd.DataFrame({'Date': dates, 'Deaths': deaths, 'Cases': cases})

india_df['Date'] = pd.to_datetime(india_df['Date']).dt.date
india_df['Date'] = india_df['Date'].astype(str)


# Display the first 5 rows of the dataframe
india_df



Unnamed: 0,Date,Deaths,Cases
0,2020-01-22,0,0
1,2020-01-23,0,0
2,2020-01-24,0,0
3,2020-01-25,0,0
4,2020-01-26,0,0
...,...,...,...
1071,2022-12-28,530698,44679139
1072,2022-12-29,530699,44679382
1073,2022-12-30,530702,44679608
1074,2022-12-31,530705,44679873


# India Vaccinations

Data source: https://github.com/owid/covid-19-data

In [7]:

# Open the JSON file and load the data
with open('owid-covid-data.json', 'r') as f:
    data = json.load(f)


# Extract the data for "IND"
india_data = data['IND']['data']

# Extract the "date" and "total_vaccinations" data for "IND"
india_vaccination_data = [(entry['date'], entry['total_vaccinations']) for entry in india_data if 'total_vaccinations' in entry]

# Create a Pandas DataFrame
vaccinations_df = pd.DataFrame(india_vaccination_data, columns=['Date', 'Total Vaccinations'])

# Convert the "total vaccinations" column to a standard decimal format
pd.options.display.float_format = '{:.2f}'.format
vaccinations_df['Total Vaccinations'] = vaccinations_df['Total Vaccinations'].astype(float)

#vaccinations_df['Date'] = vaccinations_df['Date'].astype(str)



#Dates that I'm missing that I will need to manually add
#(2020, 1, 12), (2020, 2, 12), (2020, 11, 17), (2020, 11, 18), (2020, 11, 19), (2020, 12, 1), (2020, 12, 2), (2020, 12, 6), (2020, 12, 7), (2020, 12, 9), (2020, 12, 12), (2020, 12, 16), (2020, 12, 19), (2020, 12, 20), (2020, 12, 23), (2020, 12, 31), (2021, 1, 2), (2021, 1, 4), (2021, 1, 6), (2021, 1, 7), (2021, 1, 9)


# create a new dataframe with dates present in Pixstory dataset but not in my vaccinations dataset
additional_dates = [('2020', '1', '12'), ('2020', '2', '12'), ('2020', '11', '17'), ('2020', '11', '18'), ('2020', '11', '19'),
             ('2020', '12', '2'), ('2020', '12', '6'), ('2020', '12', '9'), ('2020', '12', '12'), ('2020', '12', '16'),
             ('2020', '12', '19'), ('2020', '12', '20'), ('2020', '12', '23'), ('2020', '12', '31'), ('2021', '1', '2'),
             ('2021', '1', '6'), ('2021', '1', '7'), ('2021', '1', '9')]

additional_dates_df = pd.DataFrame(additional_dates, columns=['Year', 'Month', 'Day'])
additional_dates_df['Date'] = pd.to_datetime(additional_dates_df[['Year', 'Month', 'Day']]).dt.date

# concatenate the two dataframes
final_vaccinations_df = pd.concat([additional_dates_df, vaccinations_df], ignore_index=True)


final_vaccinations_df= final_vaccinations_df.drop(['Year', 'Month', 'Day'], axis=1)

# replace NaN with zero
final_vaccinations_df.fillna(0, inplace=True)

final_vaccinations_df['Date'] = final_vaccinations_df['Date'].astype(str)

# Print the DataFrame
final_vaccinations_df.head(30)







Unnamed: 0,Date,Total Vaccinations
0,2020-01-12,0.0
1,2020-02-12,0.0
2,2020-11-17,0.0
3,2020-11-18,0.0
4,2020-11-19,0.0
5,2020-12-02,0.0
6,2020-12-06,0.0
7,2020-12-09,0.0
8,2020-12-12,0.0
9,2020-12-16,0.0


# Merge


In [9]:
#Merging the DataFrames on the 'Date' column

merged_df = pd.merge(india_df, final_vaccinations_df, on='Date')

merged_df['New COVID Cases'] = merged_df['Cases'].diff().fillna(0).astype(int)
merged_df['New Deaths'] = merged_df['Deaths'].diff().fillna(0).astype(int)
merged_df['New Vaccinations'] = merged_df['Total Vaccinations'].diff().fillna(0).astype(int)

#Dropping columns
final_covid_df = merged_df.drop(['Cases', 'Deaths', 'Total Vaccinations'], axis=1)

# save the data frame as a CSV file
final_covid_df.to_csv('Final_COVID_Data.csv', index=False)

final_covid_df

Unnamed: 0,Date,New COVID Cases,New Deaths,New Vaccinations
0,2020-02-12,0,0,0
1,2020-11-17,8912904,130993,0
2,2020-11-18,45576,585,0
3,2020-11-19,45882,584,0
4,2020-12-02,530599,6486,0
...,...,...,...,...
707,2022-12-28,268,2,90596
708,2022-12-29,243,1,95882
709,2022-12-30,226,3,84200
710,2022-12-31,265,3,88286


# Visualizations 

In [1]:
import matplotlib.pyplot as plt

# Set the figure size
plt.figure(figsize=(10, 6))

# Plot the three columns against the date column
#plt.plot(merged_df['Date'], merged_df['New COVID Cases'], label='Column 1')
#plt.plot(merged_df['Date'], merged_df['New Deaths'], label='Column 2')
plt.plot(merged_df['Date'], merged_df['Total Vaccinations'], label='Column 3')

# Add labels and title
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Three Columns over Time')

# Add legend
plt.legend()

# Show the plot
plt.show()


NameError: name 'merged_df' is not defined

<Figure size 720x432 with 0 Axes>

# Adding tp Pixstory


In [16]:
pixstory_df = pd.read_csv('Master_Dataset_Raw_copy.csv')

covid_df = pd.read_csv('Final_COVID_Data.csv')

# convert the 'date' column to datetime format
pixstory_df['Account Created Date'] = pd.to_datetime(pixstory_df['Account Created Date'])

#sort by date
pixstory_df = pixstory_df.sort_values(by='Account Created Date')

#create new column for dates only
pixstory_df['Date'] = pd.to_datetime(pixstory_df['Account Created Date']).dt.date

#prepping columns for merge by converting to string for both dfs
pixstory_df['Date'] = pixstory_df['Date'].astype('string')
covid_df['Date'] = covid_df['Date'].astype('string')

# merge the DataFrames on 'Date' column while preserving all rows
combined_df = pd.merge(pixstory_df, covid_df, on='Date', how='outer')

# replace NaN values with 0 because COVID hadn't penetrated india yet so we know all values are zero
combined_df = combined_df.fillna(0)

combined_df.head(20)

#pixstory_df



Unnamed: 0,Story Primary ID,Story ID,User Primary ID,User ID,Gender,Age,Title,Narrative,Media,Account Created Date,Interest,Date,New COVID Cases,New Deaths,New Vaccinations
0,121169.0,STY1659426957,103.0,USR1606807023,female,34.0,Trend of the Year: Barbiecore,"The colour of the year is here, and it's *drum...",https://image.pixstory.com/Pixstory-image-1659...,2020-01-12 12:47:00,"trends, fashion, barbie",2020-01-12,0.0,0.0,0.0
1,127727.0,STY1660634861,103.0,USR1606807023,female,34.0,Abomination of the day,We Indians do love to bastardise our foods- Ch...,https://image.pixstory.com/Pixstory-image-1660...,2020-01-12 12:47:00,"Food, momos, weird menus",2020-01-12,0.0,0.0,0.0
2,123665.0,STY1660027898,103.0,USR1606807023,female,34.0,Shameful headline in 2022,Can professors not have personal lives? \n\nAd...,https://image.pixstory.com/Pixstory-image-1660...,2020-01-12 12:47:00,"misogyny, st xaviers",2020-01-12,0.0,0.0,0.0
3,130517.0,STY1661151635,103.0,USR1606807023,female,34.0,Woman lawyer arrested for abusing security guard,"She was recorded on video manhandling him, sho...",https://image.pixstory.com/Pixstory-image-1661...,2020-01-12 12:47:00,"Technology, History, Food, Entertainment, Spor...",2020-01-12,0.0,0.0,0.0
4,125466.0,STY1660372361,109.0,USR1606851217,others,31.0,What is the Inflation Reduction Act?,The House passed the Inflation Reduction Act o...,https://image.pixstory.com/Pixstory-image-1660...,2020-02-12 01:03:00,us government,2020-02-12,0.0,0.0,0.0
5,129472.0,STY1660912862,2.0,USR1605614195,others,27.0,August is my favourite month,"<p dir=""ltr""><a href=""https://www.google.com"">...",https://image.pixstory.com/Pixstory-image-1647...,2020-11-17 17:26:00,"Technology, History, Food, Entertainment, Spor...",2020-11-17,8912904.0,130993.0,0.0
6,128902.0,STY1660827749,2.0,USR1605614195,others,27.0,How bad are things at ManU?,"<p dir=""ltr"">How bad are things at ManU?</p>\n...",https://image.pixstory.com/Pixstory-image-1634...,2020-11-17 17:26:00,"Technology, History, Food, Entertainment, Spor...",2020-11-17,8912904.0,130993.0,0.0
7,124381.0,STY1660146053,2.0,USR1605614195,others,27.0,"Trump raided by FBI in Florida, pleads Fifth A...","<p dir=""ltr"">The noose is tightening.</p>\n<p ...",https://image.pixstory.com/Pixstory-image-1660...,2020-11-17 17:26:00,"Technology, History, Food, Entertainment, Spor...",2020-11-17,8912904.0,130993.0,0.0
8,121828.0,STY1659554880,2.0,USR1605614195,others,27.0,"Hi, whats up","<p dir=""ltr"">Hi, whats up</p>",https://image.pixstory.com/Pixstory-image-1659...,2020-11-17 17:26:00,"Technology, History, Food, Entertainment, Spor...",2020-11-17,8912904.0,130993.0,0.0
9,121349.0,STY1659450917,2.0,USR1605614195,others,27.0,You can never go wrong with Led Zep,"<p dir=""ltr"">You can never go wrong with <b>Le...",https://image.pixstory.com/Pixstory-image-1659...,2020-11-17 17:26:00,"Technology, History, Food, Entertainment, Spor...",2020-11-17,8912904.0,130993.0,0.0
