In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline

In [None]:
data = pd.read_csv('../input/covid19-confirmed-cases-kerala/Confirmed Covid 19 Cases Kerala.csv')
data.head()

In [None]:
data.isnull().sum()

In [None]:
data.plot(kind='line')

In [None]:
data['Date'] = pd.to_datetime(data['Date'], utc=True)
data = data.set_index('Date')
data.head()

In [None]:
plt.figure(figsize=(10,5))
plt.title('Confirmed Cases 2020 - 21', fontsize=15)
sns.barplot(data.index, data.Confirmed)
plt.show()
 
 

# Year-wise Analysis

In [None]:
data.index.year

In [None]:
data20 = data[data.index.year == 2020]
data21 = data[data.index.year == 2021]
print(data20.head())
print(data21.head())

In [None]:
print('Covid19 Confirmed Cases')
print('2020: {}'.format(int(data20.Confirmed.sum())))
print(f'2021: {int(data21.Confirmed.sum())}')

In [None]:
data20.plot()
plt.title('Confirmed Cases - 2020')
 
data21.plot()
plt.title('Confirmed Cases - 2021')
plt.show()#not required

In [None]:
plt.figure(figsize=(10,5))
plt.title('Confirmed Cases - 2020', fontsize=15)
sns.scatterplot(data20.index, data20.Confirmed)
plt.show()
plt.figure(figsize=(10,5))
sns.barplot(data20.index, data20.Confirmed)
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.title('Confirmed Cases - 2021', fontsize=15)
sns.scatterplot(data21.index, data21.Confirmed)
plt.show()
plt.figure(figsize=(10,5))
sns.barplot(data21.index, data21.Confirmed)
plt.show()

In [None]:
# Violinplot vs Stripplot
plt.figure(figsize=(10,5))
plt.title('Plot - 2020', fontsize=15)
sns.violinplot(data20.Confirmed, color='white')
sns.stripplot(data20.Confirmed, color='black', 
              s=10, alpha=0.5)
plt.show()
plt.figure(figsize=(10,5))
plt.title('Plot - 2021', fontsize=15)
sns.violinplot(data21.Confirmed, color='white')
sns.stripplot(data21.Confirmed, color='maroon', 
              s=10, alpha=0.5)
plt.show()

## Month-wise Analysis

In [None]:
index = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

month = pd.DataFrame({'Month': index,
          'Y2020': data20.resample('M').sum().reset_index().Confirmed,
          'Y2021': data21.resample('M').sum().reset_index().Confirmed}).set_index('Month')
month

In [None]:
month.describe()

In [None]:
plt.figure(figsize=(10,5))
sns.lineplot(data=month, marker='o')
plt.show()

In [None]:
sns.pairplot(month, kind='kde')
plt.show()

In [None]:
plt.figure(figsize=(4,8))
sns.heatmap(month, cmap='Greens')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.title('Covid Confirmed: 2020', fontsize=15)
sns.pointplot(month.index, month.Y2020)
plt.show()
plt.figure(figsize=(10,5))
sns.barplot(month.index, month.Y2020)
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.title('Covid Confirmed: 2021', fontsize=15)
sns.pointplot(month.index[:5], month.Y2021[:5])
plt.show()
plt.figure(figsize=(10,5))
sns.barplot(month.index[:5], month.Y2021[:5])
plt.show()

## Covid Cases: June 2020 - May 2021

In [None]:
data = data.loc['2020-06-01 00:00:00+00:00':]
data

In [None]:
col = ['Day',"Jun'20", "Jul'20", "Aug'20", "Sep'20", "Oct'20", "Nov'20",
       "Dec'20", "Jan'21", "Feb'21", "Mar'21", "Apr'21", "May'21"]
day = [x for x in range(1,32)]  
year_data = pd.DataFrame(columns=col)
year_data['Day'] = day
for i in range(6,13):
    year_data[col[i-5]] = data[data.index.month==i].reset_index().Confirmed 
for i in range(1,6):
    year_data[col[i+7]] = data[data.index.month==i].reset_index().Confirmed 
year_data = year_data.set_index('Day')
year_data

In [None]:
plt.figure(figsize=(12,5))
sns.scatterplot(data=year_data, s=100)
plt.show()

In [None]:
plt.figure(figsize=(15,8))
sns.boxplot(data=year_data)
sns.stripplot(data=year_data, s=10, alpha=0.5)
plt.show()

In [None]:
label = year_data.columns.tolist()
for i in range(0,12):
    plt.figure(figsize=(10,5))
    plt.title(f'Day-wise Analysis - {label[i]}', fontsize=15)
    plt.ylabel('Confirmed Cases')
    sns.barplot(year_data.index, year_data[label[i]])

In [None]:
year_data.plot()
plt.title('Covid Case Variations - Before Normalization', fontsize=15)
plt.figure(figsize=(12,6))
sns.lineplot(data=year_data)
plt.show()

In [None]:
# Normalizing Data
norm_data = (year_data - year_data.min()) / (year_data.max() - year_data.min())

plt.figure(figsize=(15,8))
plt.title('Covid Case Variations - After Normalization', fontsize=15)
sns.lineplot(data=norm_data, marker='o')
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.title("Covid Cases: June'20 to May'21", fontsize=15)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Confirmed Cases', fontsize=12)
sns.barplot(label, year_data.sum())
plt.show()

## Thank You!