In [6]:
import pandas as pd
from tabulate import tabulate

In [7]:
# Select a Dataset
df = pd.read_csv('archive/country_wise_latest.csv')

In [12]:
#Cleans rows with missing value
# df = df.dropna()
# removed NaN/inf values
df = df.replace([float('inf'), float('-inf')], pd.NA).dropna()

In [14]:
# List of numerical columns to analyze
numerical_columns = [
    'Confirmed',
    'Deaths',
    'Recovered',
    'Active',
    'New cases',
    'New deaths',
    'New recovered',
    'Deaths / 100 Cases',
    'Recovered / 100 Cases',
    'Deaths / 100 Recovered',
    'Confirmed last week',
    '1 week change',
    '1 week % increase'
]

# Function to calculate and print descriptive statistics for a given column
def print_statistics(column):
    mean_value = df[column].mean()
    median_value = df[column].median()
    mode_value = df[column].mode()[0]
    std_value = df[column].std()
    variance_value = df[column].var()
    min_value = df[column].min()
    max_value = df[column].max()
    range_value = max_value - min_value
    percentile_25 = df[column].quantile(0.25)
    percentile_50 = df[column].quantile(0.50)
    percentile_75 = df[column].quantile(0.75)

    data = [
        ["Mean", f"{mean_value:.0f}"],
        ["Median", f"{median_value:.0f}"],
        ["Mode", f"{mode_value:.0f}"],
        ["Standard Deviation", f"{std_value:.0f}"],
        ["Variance", f"{variance_value:.0f}"],
        ["Min", f"{min_value:.0f}"],
        ["Max", f"{max_value:.0f}"],
        ["Range", f"{range_value:.0f}"],
        ["25th Percentile", f"{percentile_25:.0f}"],
        ["50th Percentile (Median)", f"{percentile_50:.0f}"],
        ["75th Percentile", f"{percentile_75:.0f}"]
    ]

    print(f"\nDescriptive Statistics for {column}")
    print(tabulate(data, headers=["Statistic", "Value"], tablefmt="pretty", floatfmt=".2f"))

for column in numerical_columns:
    print_statistics(column)

# For 'WHO Region'
most_frequent_region = df['WHO Region'].mode()[0]
print("\nMost Frequent WHO Region:", most_frequent_region)




Descriptive Statistics for Confirmed
+--------------------------+--------------+
|        Statistic         |    Value     |
+--------------------------+--------------+
|           Mean           |    89330     |
|          Median          |     4970     |
|           Mode           |      24      |
|    Standard Deviation    |    388432    |
|         Variance         | 150879672538 |
|           Min            |      10      |
|           Max            |   4290259    |
|          Range           |   4290249    |
|     25th Percentile      |     1107     |
| 50th Percentile (Median) |     4970     |
|     75th Percentile      |    39676     |
+--------------------------+--------------+

Descriptive Statistics for Deaths
+--------------------------+-----------+
|        Statistic         |   Value   |
+--------------------------+-----------+
|           Mean           |   3510    |
|          Median          |    104    |
|           Mode           |     0     |
|    Standard Deviati