In [1]:
import numpy as np

In [2]:
covid = np.genfromtxt(
    # datasets/COVID DEATHS.csv",
    "datasets/COVID_VACCINATIONS.csv",
    delimiter=',',
    skip_header=1,
    dtype=[
        ('iso_code', 'U10'),
        ('continent', 'U100'),
        ('location', 'U100')
    ],
    usecols=(0, 1, 2)
)

In [3]:
covid

array([('AFG', 'Asia', 'Afghanistan'), ('AFG', 'Asia', 'Afghanistan'),
       ('AFG', 'Asia', 'Afghanistan'), ..., ('ZWE', 'Africa', 'Zimbabwe'),
       ('ZWE', 'Africa', 'Zimbabwe'), ('ZWE', 'Africa', 'Zimbabwe')],
      dtype=[('iso_code', '<U10'), ('continent', '<U100'), ('location', '<U100')])

### Os 10 paises com maior população tiveram maiores números, em proporção, de mortes por Covid-19?

Devido ao número elevado de pessoas no país, e consequentemente, seu recursos de saúde limitados, o número de mortes pela Covid pode ter sido muito maior, do que em paises com menor população.

In [4]:
covid = np.genfromtxt(
    "datasets/COVID DEATHS.csv",
    #"datasets/COVID_VACCINATIONS.csv",
    delimiter=',',
    skip_header=1,
    dtype=[
        ('location', 'U100'),
        ('population', int),
        ('new_deaths', float)
    ],
    usecols=(2, 4, 9)
)

covid

array([('Afghanistan', 41128772, 0.), ('Afghanistan', 41128772, 0.),
       ('Afghanistan', 41128772, 0.), ..., ('Zimbabwe', 16320539, 0.),
       ('Zimbabwe', 16320539, 0.), ('Zimbabwe', 16320539, 0.)],
      dtype=[('location', '<U100'), ('population', '<i8'), ('new_deaths', '<f8')])

In [5]:
unique = np.unique(covid['location'], return_index=True)[1]
unique.shape

(255,)

In [6]:
paises = covid['location'][unique]
paises.shape

(255,)

In [7]:
pop = covid['population'][unique]
pop
# pop.shape
# type(pop)

array([  41128772, 1426736614,    2842318,   44903228,      44295,
            79843,   35588996,      15877,      93772,   45510324,
          2780472,     106459, 4721383370,   26177410,    8939617,
         10358078,     409989,    1472237,  171186368,     281646,
          9534956,   11655923,     405285,   13352864,      64207,
           782457,   12224114,      27052,    3233530,    2630300,
        215313504,      31332,     449002,    6781955,   22673764,
         12889583,   16767851,   27914542,   38454328,     593162,
            68722,    5579148,   17723312,   19603736, 1425887360,
         51874028,     836783,    5970430,      17032,    5180836,
         28160548,    4030361,   11212198,     191173,     896007,
         10493990,   99010216,    5882259,    1120851,      72758,
         11228821,   18001002,  110990096,    6336393,   56550000,
          1674916,    3684041,    1326064,    1201680,  123379928,
        744807803,  450146793,      53117,       3801,     929

In [8]:
pais = np.unique(covid['location'])

In [9]:
copy_covid = covid[~np.isnan(covid['new_deaths'])].copy()

In [10]:
cases = [(p, copy_covid[copy_covid['location'] == p]['new_deaths'].sum()) for p in pais]
# len(cases)
# type(cases)
casos = np.array(cases)
# casos.shape
casos[:5]

array([['Afghanistan', '7884.0'],
       ['Africa', '258851.0'],
       ['Albania', '3604.0'],
       ['Algeria', '6881.0'],
       ['American Samoa', '34.0']], dtype='<U32')

In [11]:
total_array = np.hstack((pop[:, np.newaxis], casos))
total_array[:5]

array([['41128772', 'Afghanistan', '7884.0'],
       ['1426736614', 'Africa', '258851.0'],
       ['2842318', 'Albania', '3604.0'],
       ['44903228', 'Algeria', '6881.0'],
       ['44295', 'American Samoa', '34.0']], dtype='<U32')

In [12]:
# Calculo da proporção entre mortes e população
calc_percent = (total_array[:, 2].astype(float) / total_array[:, 0].astype(float))*100
calc_percent[:5]

array([0.01916906, 0.01814287, 0.12679792, 0.01532407, 0.0767581 ])

In [13]:
result = np.hstack((total_array, calc_percent[:, np.newaxis]))
result[:5]
# 0-Population
# 1-Country
# 2-Total Deaths
# 3-Proportion

array([['41128772', 'Afghanistan', '7884.0', '0.01916906247529102'],
       ['1426736614', 'Africa', '258851.0', '0.0181428721643503'],
       ['2842318', 'Albania', '3604.0', '0.12679791634855775'],
       ['44903228', 'Algeria', '6881.0', '0.015324065343364624'],
       ['44295', 'American Samoa', '34.0', '0.07675809910825149']],
      dtype='<U32')

In [14]:
# Análise Top10 países por maior população

sorted_indices = np.argsort(result[:, 0].astype(int))[::-1]
sorted_array = result[sorted_indices]
print(sorted_array[:20])
# Países com maior população, indices (5,6,13,14,15,16,17,18,19,20)

[['7975105024' 'World' '6899687.0' '0.08651531207722438']
 ['4721383370' 'Asia' '1628635.0' '0.034494868820618564']
 ['3432097300' 'Lower middle income' '1340097.0' '0.039046008398421574']
 ['2525921300' 'Upper middle income' '2654426.0' '0.10508743878916577']
 ['1426736614' 'Africa' '258851.0' '0.0181428721643503']
 ['1425887360' 'China' '120905.0' '0.008479281280675634']
 ['1417173120' 'India' '531000.0' '0.03746895792096311']
 ['1250514600' 'High income' '2853330.0' '0.22817246595921392']
 ['744807803' 'Europe' '2038597.0' '0.27370779304254955']
 ['737604900' 'Low income' '47890.0' '0.006492635827120996']
 ['600323657' 'North America' '1594720.0' '0.26564337110573005']
 ['450146793' 'European Union' '1223798.0' '0.27186642647035364']
 ['436816679' 'South America' '1352944.0' '0.30972809991992084']
 ['338289856' 'United States' '1121237.0' '0.3314426903773313']
 ['275501344' 'Indonesia' '161071.0' '0.05846468756246794']
 ['235824864' 'Pakistan' '30652.0' '0.01299778126873']
 ['218541

In [15]:
top10_i = [5,6,13,14,15,16,17,18,19,20]
top10_pop = sorted_array[top10_i]
top10_pop

array([['1425887360', 'China', '120905.0', '0.008479281280675634'],
       ['1417173120', 'India', '531000.0', '0.03746895792096311'],
       ['338289856', 'United States', '1121237.0', '0.3314426903773313'],
       ['275501344', 'Indonesia', '161071.0', '0.05846468756246794'],
       ['235824864', 'Pakistan', '30652.0', '0.01299778126873'],
       ['218541216', 'Nigeria', '3155.0', '0.0014436636062279437'],
       ['215313504', 'Brazil', '700556.0', '0.3253655655522656'],
       ['171186368', 'Bangladesh', '29452.0', '0.017204640967673313'],
       ['144713312', 'Russia', '397642.0', '0.27477914402235504'],
       ['127504120', 'Mexico', '333596.0', '0.2616354671519634']],
      dtype='<U32')

In [16]:
# Análise Top10 países por maior número de morte proporcionais %

sorted_indices = np.argsort(result[:, 3].astype(float))[::-1]
sorted_array = result[sorted_indices]

In [17]:
print(sorted_array[:10])

[['34049588' 'Peru' '219866.0' '0.645722937969176']
 ['6781955' 'Bulgaria' '38282.0' '0.5644685050254683']
 ['3233530' 'Bosnia and Herzegovina' '16328.0' '0.5049589767220345']
 ['9967304' 'Hungary' '48719.0' '0.4887881417081289']
 ['2093606' 'North Macedonia' '9667.0' '0.461739219318248']
 ['3744385' 'Georgia' '17032.0' '0.4548677553189643']
 ['4030361' 'Croatia' '18091.0' '0.4488679798162001']
 ['627082' 'Montenegro' '2808.0' '0.4477883275233542']
 ['2119843' 'Slovenia' '9230.0' '0.43540960344704777']
 ['10493990' 'Czechia' '42702.0' '0.40691862675683893']]


Conclusão: Os TOP10 com maior população não necessariamente possuíram os maiores números de mortes proporcionais.

TOP10 - População e % de mortes

* China - 0,008%
* India - 0,037%
* United States - 0,331%
* Indonesia - 0,058%
* Pakistan - 0,012%
* Nigeria - 0,001%
* Brazil - 0,325%
* Bangladesh - 0,017%
* Russia - 0,274%
* Mexico - 0,261%

TOP10 - Maiores % de mortes

* Peru - 0,645%
* Bulgaria - 0,564%
* Bosnia and Herzegovina - 0,504%
* Hungary - 0,488%
* North Macedonia - 0,461%
* Georgia - 0,454%
* Croatia - 0,448%
* Montenegro - 0,447%
* Slovenia - 0,435%
* Czechia - 0,406%

### Quais foram os 5 países com maior % de mortes em relação a sua população total?

Verficar se houve alguma relação entre localização (continente) e contaminção.

In [18]:
covid = np.genfromtxt(
    "datasets/COVID DEATHS.csv",
    #"datasets/COVID_VACCINATIONS.csv",
    delimiter=',',
    skip_header=1,
    dtype=[
        ('location', 'U100'),
        ('continent', 'U100'),
        ('population', int),
        ('new_deaths', float)
    ],
    usecols=(2, 1, 4, 9)
)

covid

array([('Afghanistan', 'Asia', 41128772, 0.),
       ('Afghanistan', 'Asia', 41128772, 0.),
       ('Afghanistan', 'Asia', 41128772, 0.), ...,
       ('Zimbabwe', 'Africa', 16320539, 0.),
       ('Zimbabwe', 'Africa', 16320539, 0.),
       ('Zimbabwe', 'Africa', 16320539, 0.)],
      dtype=[('location', '<U100'), ('continent', '<U100'), ('population', '<i8'), ('new_deaths', '<f8')])

In [19]:
unique = np.unique(covid[['location', 'continent']], return_index=True, axis=0)[1]
unique

array([     0,   1196,   2392,   3588,   4784,   5980,   7176,   8372,
         9568,  10764,  11966,  13162,  14363,  15564,  16761,  17963,
        19159,  20355,  21551,  22752,  23948,  25144,  26340,  27536,
        28732,  29928,  31124,  32320,  33516,  34712,  35908,  37104,
        38300,  39496,  40697,  41893,  43089,  44285,  45481,  46677,
        47873,  49069,  50265,  51461,  52663,  53859,  55055,  56251,
        57447,  58643,  59839,  61035,  62231,  63430,  64626,  65822,
        67023,  68219,  69415,  70611,  71807,  73003,  74199,  75395,
        76591,  77710,  78906,  80102,  81298,  82494,  83690,  84891,
        86092,  87288,  88484,  89685,  90881,  92081,  93277,  94473,
        95669,  96865,  98061,  99263, 100459, 101655, 102856, 104052,
       105248, 106444, 107640, 108836, 110032, 111228, 112424, 113620,
       114816, 116017, 117213, 118385, 119581, 120777, 121978, 123174,
       124370, 125566, 126762, 127958, 129160, 130361, 131557, 132757,
      

In [20]:
paises = covid[unique]
paises[:5]

array([('Afghanistan', 'Asia',   41128772, 0.),
       ('Africa', '', 1426736614, 0.),
       ('Albania', 'Europe',    2842318, 0.),
       ('Algeria', 'Africa',   44903228, 0.),
       ('American Samoa', 'Oceania',      44295, 0.)],
      dtype=[('location', '<U100'), ('continent', '<U100'), ('population', '<i8'), ('new_deaths', '<f8')])

In [21]:
pop = covid['population'][unique]
pop[:5]

array([  41128772, 1426736614,    2842318,   44903228,      44295])

In [22]:
pais = np.unique(covid['location'])
pais.shape

(255,)

In [23]:
continente = covid['continent'][unique]
continente.shape

(255,)

In [24]:
copy_covid = covid[~np.isnan(covid['new_deaths'])].copy()

In [25]:
cases = [(p, copy_covid[copy_covid['location'] == p]['new_deaths'].sum()) for p in pais]
casos = np.array(cases)
casos[:5]

array([['Afghanistan', '7884.0'],
       ['Africa', '258851.0'],
       ['Albania', '3604.0'],
       ['Algeria', '6881.0'],
       ['American Samoa', '34.0']], dtype='<U32')

In [26]:
total_array = np.hstack((pop[:, np.newaxis], casos))
total_array = np.hstack((continente[:, np.newaxis], total_array))
total_array[:5]

array([['Asia', '41128772', 'Afghanistan', '7884.0'],
       ['', '1426736614', 'Africa', '258851.0'],
       ['Europe', '2842318', 'Albania', '3604.0'],
       ['Africa', '44903228', 'Algeria', '6881.0'],
       ['Oceania', '44295', 'American Samoa', '34.0']], dtype='<U100')

In [27]:
# Calculo da proporção entre mortes e população
calc_percent = (total_array[:, 3].astype(float) / total_array[:, 1].astype(float))*100
calc_percent[:5]

array([0.01916906, 0.01814287, 0.12679792, 0.01532407, 0.0767581 ])

In [28]:
result = np.hstack((total_array, calc_percent[:, np.newaxis]))
result[:5]
# 0-Continente
# 1-Population
# 2-Country
# 3-Total Deaths
# 4-Proportion

array([['Asia', '41128772', 'Afghanistan', '7884.0',
        '0.01916906247529102'],
       ['', '1426736614', 'Africa', '258851.0', '0.0181428721643503'],
       ['Europe', '2842318', 'Albania', '3604.0', '0.12679791634855775'],
       ['Africa', '44903228', 'Algeria', '6881.0',
        '0.015324065343364624'],
       ['Oceania', '44295', 'American Samoa', '34.0',
        '0.07675809910825149']], dtype='<U100')

In [29]:
# Análise Top5 países por maior número de morte percentuais %

sorted_indices = np.argsort(result[:, 4].astype(float))[::-1]
sorted_array = result[sorted_indices]
print(sorted_array[:5])

[['South America' '34049588' 'Peru' '219866.0' '0.645722937969176']
 ['Europe' '6781955' 'Bulgaria' '38282.0' '0.5644685050254683']
 ['Europe' '3233530' 'Bosnia and Herzegovina' '16328.0'
  '0.5049589767220345']
 ['Europe' '9967304' 'Hungary' '48719.0' '0.4887881417081289']
 ['Europe' '2093606' 'North Macedonia' '9667.0' '0.461739219318248']]


Conclusão: Os TOP5 países com maior % de mortes são na maioria países da Europa, pode ser que haja correlação!

TOP5 - Países com maior % de mortes e seus continentes

* Peru - South America - 0.64%
* Bulgaria - Europe - 0.56%
* Bosnia and Herzegovina - Europe - 0.50%
* Hungary - Europe - 0.48%
* North Macedonia - Europe - 0.46