In [15]:
import pandas as pd

In [16]:
class Dataset:
    def __init__(self, filepath):
        self.filepath = filepath

    def load_data(self):
        self.data = pd.read_csv(self.filepath)
        return self.data

In [17]:
class CovidReport(Dataset):
    def __init__(self, filepath):
        super().__init__(filepath)
        self.data = self.load_data()

    # Summarize Case Counts by Region
        # Display total confirmed, death, and recovered cases for each region.
    def summarize_cases_by_region(self):
        summary = self.data.groupby('WHO Region').agg({
            'Confirmed': 'sum',
            'Deaths': 'sum',
            'Recovered': 'sum',
            'Active': 'sum'
        }).reset_index()
        return summary

    # Filter Low Case Records
        # Exclude entries where confirmed cases are < 10.
    def filter_low_case_records(self):
        self.data = self.data[self.data['Confirmed'] >= 10]
        return self.data
    
    # Identify Region with Highest Confirmed Cases
    def region_with_highest_cases(self):
        region = self.data.groupby('WHO Region')['Confirmed'].sum().idxmax()
        return region

    # Sort data by Confirmed Cases and Save to CSV
    def save_sorted_data(self, output_filepath):
        sorted_data = self.data.sort_values(by='Confirmed', ascending=False)
        sorted_data.to_csv(output_filepath, index=False)
        return sorted_data

    # Top 5 Countries by Case Count
    def top_5_countries_by_case_count(self):
        return self.data.sort_values('Confirmed', ascending=False).head(5)[['Country/Region', 'Confirmed']]

    # Region with Lowest Death Count
    def region_with_lowest_death_count_cases(self):
        region = self.data.groupby('WHO Region')['Deaths'].sum().idxmin()
        return region
    
    # India Case Summary
    def india_case_summary(self):
        return self.data[self.data['Country/Region'] == 'India'].to_string(index=False)
    
    # Mortality Rate by Region
    def mortality_rate_by_region(self):
        region = self.data.groupby('WHO Region')['Deaths'].sum() / self.data.groupby('WHO Region')['Confirmed'].sum() * 100
        return region
    
    # Recovery Rate by Region
    def compare_recovery_rates(self):
        region = self.data.groupby('WHO Region')['Recovered'].sum() / self.data.groupby('WHO Region')['Confirmed'].sum() * 100
        return region

    # Detect Outliers in Case Counts
    def detect_outliers(self):
        mean = self.data['Confirmed'].mean()
        std_dev = self.data['Confirmed'].std()

        threshold_upper = mean + 2 * std_dev
        threshold_lower = mean - 2 * std_dev

        outliers = self.data[(self.data['Confirmed'] > threshold_upper) | (self.data['Confirmed'] < threshold_lower)]
        return outliers
    
    # Group by Country Counts
    def group_by_country(self):
        return self.data.groupby('Country/Region').size().reset_index(name='Counts')
    
    # Countries with Zero Recovered Cases
    def country_with_zero_recovered_cases(self):
        return self.data[self.data['Recovered'] == 0]['Country/Region']


In [18]:
if __name__ == "__main__": 
    report = CovidReport('c:/Users/2276038/Desktop/Learning/python/assignments/Assignment_week4/country_wise_latest.csv')
    print("COVID-19 Cases Summary by Region:")
    print(report.summarize_cases_by_region())

COVID-19 Cases Summary by Region:
              WHO Region  Confirmed  Deaths  Recovered   Active
0                 Africa     723207   12223     440645   270339
1               Americas    8839286  342732    4468616  4027938
2  Eastern Mediterranean    1490744   38339    1201400   251005
3                 Europe    3299523  211144    1993723  1094656
4        South-East Asia    1835297   41349    1156933   637015
5        Western Pacific     292428    8249     206770    77409


In [19]:
print("\nFiltered Data (Confirmed Cases >= 10):")
print(report.filter_low_case_records())


Filtered Data (Confirmed Cases >= 10):
         Country/Region  Confirmed  Deaths  Recovered  Active  New cases  \
0           Afghanistan      36263    1269      25198    9796        106   
1               Albania       4880     144       2745    1991        117   
2               Algeria      27973    1163      18837    7973        616   
3               Andorra        907      52        803      52         10   
4                Angola        950      41        242     667         18   
..                  ...        ...     ...        ...     ...        ...   
182  West Bank and Gaza      10621      78       3752    6791        152   
183      Western Sahara         10       1          8       1          0   
184               Yemen       1691     483        833     375         10   
185              Zambia       4552     140       2815    1597         71   
186            Zimbabwe       2704      36        542    2126        192   

     New deaths  New recovered  Deaths / 100 Ca

In [20]:
print("\nRegion with Highest Confirmed Cases:")
print(report.region_with_highest_cases())


Region with Highest Confirmed Cases:
Americas


In [21]:
print("\nSaving sorted data to 'sorted_covid_data.csv'...")
sorted_data = report.save_sorted_data('c:/Users/2276038/Desktop/Learning/python/assignments/Assignment_week4/sorted_covid_data.csv')
print("sorted_data:\n", sorted_data)


Saving sorted data to 'sorted_covid_data.csv'...
sorted_data:
             Country/Region  Confirmed  Deaths  Recovered   Active  New cases  \
173                     US    4290259  148011    1325804  2816444      56336   
23                  Brazil    2442375   87618    1846641   508116      23284   
79                   India    1480073   33408     951166   495499      44457   
138                 Russia     816680   13334     602249   201097       5607   
154           South Africa     452529    7067     274925   170537       7096   
..                     ...        ...     ...        ...      ...        ...   
49                Dominica         18       0         18        0          0   
140  Saint Kitts and Nevis         17       0         15        2          0   
68               Greenland         14       0         13        1          1   
75                Holy See         12       0         12        0          0   
183         Western Sahara         10       1          8

In [22]:
print("\nTop 5 Countries by Confirmed Cases:")
print(report.top_5_countries_by_case_count().to_string(index=False))  # When printing to console  


Top 5 Countries by Confirmed Cases:
Country/Region  Confirmed
            US    4290259
        Brazil    2442375
         India    1480073
        Russia     816680
  South Africa     452529


In [23]:
print("\nRegion with Lowest Death Count:")
print(report.region_with_lowest_death_count_cases())


Region with Lowest Death Count:
Western Pacific


In [24]:
print("\nIndia Case Summary:")
print(report.india_case_summary())


India Case Summary:
Country/Region  Confirmed  Deaths  Recovered  Active  New cases  New deaths  New recovered  Deaths / 100 Cases  Recovered / 100 Cases  Deaths / 100 Recovered  Confirmed last week  1 week change  1 week % increase      WHO Region
         India    1480073   33408     951166  495499      44457         637          33598                2.26                  64.26                    3.51              1155338         324735              28.11 South-East Asia


In [25]:
print("\nMortality Rate by Region (%):")
print(report.mortality_rate_by_region())


Mortality Rate by Region (%):
WHO Region
Africa                   1.690111
Americas                 3.877372
Eastern Mediterranean    2.571803
Europe                   6.399228
South-East Asia          2.252987
Western Pacific          2.820865
dtype: float64


In [26]:
print("\nRecovery Rate by Region (%):")
print(report.compare_recovery_rates())


Recovery Rate by Region (%):
WHO Region
Africa                   60.929305
Americas                 50.554038
Eastern Mediterranean    80.590631
Europe                   60.424583
South-East Asia          63.037917
Western Pacific          70.708003
dtype: float64


In [27]:
print("\nOutliers in Confirmed Cases:")
print(report.detect_outliers())


Outliers in Confirmed Cases:
    Country/Region  Confirmed  Deaths  Recovered   Active  New cases  \
23          Brazil    2442375   87618    1846641   508116      23284   
79           India    1480073   33408     951166   495499      44457   
173             US    4290259  148011    1325804  2816444      56336   

     New deaths  New recovered  Deaths / 100 Cases  Recovered / 100 Cases  \
23          614          33728                3.59                  75.61   
79          637          33598                2.26                  64.26   
173        1076          27941                3.45                  30.90   

     Deaths / 100 Recovered  Confirmed last week  1 week change  \
23                     4.74              2118646         323729   
79                     3.51              1155338         324735   
173                   11.16              3834677         455582   

     1 week % increase       WHO Region  
23               15.28         Americas  
79               28

In [28]:
print("\nGroup by Country Counts:")
print(report.group_by_country())


Group by Country Counts:
         Country/Region  Counts
0           Afghanistan       1
1               Albania       1
2               Algeria       1
3               Andorra       1
4                Angola       1
..                  ...     ...
182  West Bank and Gaza       1
183      Western Sahara       1
184               Yemen       1
185              Zambia       1
186            Zimbabwe       1

[187 rows x 2 columns]


In [29]:
print("\nCountries with Zero Recovered Cases:")
print(report.country_with_zero_recovered_cases())


Countries with Zero Recovered Cases:
32          Canada
117     Mozambique
147         Serbia
161         Sweden
163          Syria
168    Timor-Leste
Name: Country/Region, dtype: object
