In [6]:
import pandas as pd
from tabulate import tabulate

In [7]:
# Select a Dataset
df = pd.read_csv('archive/country_wise_latest.csv')

In [12]:
#Cleans rows with missing value
# df = df.dropna()
# removed NaN/inf values
df = df.replace([float('inf'), float('-inf')], pd.NA).dropna()

In [13]:
#Check data type and structures
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
Index: 182 entries, 0 to 186
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Country/Region          182 non-null    object 
 1   Confirmed               182 non-null    int64  
 2   Deaths                  182 non-null    int64  
 3   Recovered               182 non-null    int64  
 4   Active                  182 non-null    int64  
 5   New cases               182 non-null    int64  
 6   New deaths              182 non-null    int64  
 7   New recovered           182 non-null    int64  
 8   Deaths / 100 Cases      182 non-null    float64
 9   Recovered / 100 Cases   182 non-null    float64
 10  Deaths / 100 Recovered  182 non-null    object 
 11  Confirmed last week     182 non-null    int64  
 12  1 week change           182 non-null    int64  
 13  1 week % increase       182 non-null    float64
 14  WHO Region              182 non-null    object 

Unnamed: 0,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Confirmed last week,1 week change,1 week % increase
count,182.0,182.0,182.0,182.0,182.0,182.0,182.0,182.0,182.0,182.0,182.0,182.0
mean,89330.31,3509.879121,52022.46,33797.97,1248.054945,29.615385,959.467033,2.972308,66.601319,79666.86,9663.450549,13.647912
std,388432.3,14280.398555,192608.4,216134.4,5786.51306,121.61485,4252.395043,3.458736,24.307913,342779.5,48124.0107,24.791266
min,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,-47.0,-3.84
25%,1107.0,18.25,719.0,131.75,4.0,0.0,0.0,0.9425,51.77,1045.25,47.0,2.7625
50%,4970.0,103.5,3220.0,1599.0,49.0,1.0,24.0,2.13,72.27,4784.0,418.0,6.89
75%,39676.25,718.25,23333.5,8696.0,419.75,5.75,228.25,3.715,87.2825,36583.5,3248.5,16.9275
max,4290259.0,148011.0,1846641.0,2816444.0,56336.0,1076.0,33728.0,28.56,100.0,3834677.0,455582.0,226.32


In [14]:
# List of numerical columns to analyze
numerical_columns = [
    'Confirmed',
    'Deaths',
    'Recovered',
    'Active',
    'New cases',
    'New deaths',
    'New recovered',
    'Deaths / 100 Cases',
    'Recovered / 100 Cases',
    'Deaths / 100 Recovered',
    'Confirmed last week',
    '1 week change',
    '1 week % increase'
]

# Function to calculate and print descriptive statistics for a given column
def print_statistics(column):
    mean_value = df[column].mean()
    median_value = df[column].median()
    mode_value = df[column].mode()[0]
    std_value = df[column].std()
    variance_value = df[column].var()
    min_value = df[column].min()
    max_value = df[column].max()
    range_value = max_value - min_value
    percentile_25 = df[column].quantile(0.25)
    percentile_50 = df[column].quantile(0.50)
    percentile_75 = df[column].quantile(0.75)

    data = [
        ["Mean", f"{mean_value:.0f}"],
        ["Median", f"{median_value:.0f}"],
        ["Mode", f"{mode_value:.0f}"],
        ["Standard Deviation", f"{std_value:.0f}"],
        ["Variance", f"{variance_value:.0f}"],
        ["Min", f"{min_value:.0f}"],
        ["Max", f"{max_value:.0f}"],
        ["Range", f"{range_value:.0f}"],
        ["25th Percentile", f"{percentile_25:.0f}"],
        ["50th Percentile (Median)", f"{percentile_50:.0f}"],
        ["75th Percentile", f"{percentile_75:.0f}"]
    ]

    print(f"\nDescriptive Statistics for {column}")
    print(tabulate(data, headers=["Statistic", "Value"], tablefmt="pretty", floatfmt=".2f"))

for column in numerical_columns:
    print_statistics(column)

# For 'WHO Region'
most_frequent_region = df['WHO Region'].mode()[0]
print("\nMost Frequent WHO Region:", most_frequent_region)




Descriptive Statistics for Confirmed
+--------------------------+--------------+
|        Statistic         |    Value     |
+--------------------------+--------------+
|           Mean           |    89330     |
|          Median          |     4970     |
|           Mode           |      24      |
|    Standard Deviation    |    388432    |
|         Variance         | 150879672538 |
|           Min            |      10      |
|           Max            |   4290259    |
|          Range           |   4290249    |
|     25th Percentile      |     1107     |
| 50th Percentile (Median) |     4970     |
|     75th Percentile      |    39676     |
+--------------------------+--------------+

Descriptive Statistics for Deaths
+--------------------------+-----------+
|        Statistic         |   Value   |
+--------------------------+-----------+
|           Mean           |   3510    |
|          Median          |    104    |
|           Mode           |     0     |
|    Standard Deviati