In [27]:
!pip install ipython-sql --quiet --upgrade
!pip install sqlalchemy --quiet --upgrade
!pip install jovian --upgrade --quiet
!pip install psycopg2 --quiet --upgrade

In [28]:
import pandas as pd

In [29]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [30]:
from sqlalchemy import create_engine

To connect ipython-sql to your database, use the following format:

In [31]:
%sql postgresql://postgres:00121359sql@localhost/Covid19_project

In [32]:
engine = create_engine('postgresql://postgres:00121359sql@localhost/Covid19_project')

All of the data come from this website [click](https://ourworldindata.org/covid-deaths)

Let's first create our table in the database. All of this step is happening in either pgadmin4 or psql shell.

Let's create our vaccine table first.

```
CREATE TABLE vaccine(
iso_code VARCHAR(100),
continent VARCHAR(100),
location VARCHAR(100),
date DATE,
new_tests FLOAT,
total_tests FLOAT,
total_tests_per_thousand FLOAT,
new_tests_per_thousand FLOAT,
new_tests_smoothed FLOAT,
new_tests_smoothed_per_thousand FLOAT,
positive_rate FLOAT,
tests_per_case FLOAT,
tests_units VARCHAR(100),
total_vaccinations FLOAT,
people_vaccinated FLOAT,
people_fully_vaccinated FLOAT,
new_vaccinations FLOAT,
new_vaccinations_smoothed FLOAT,
total_vaccinations_per_hundred FLOAT,
people_vaccinated_per_hundred FLOAT,
people_fully_vaccinated_per_hundred FLOAT,
new_vaccinations_smoothed_per_million FLOAT,
stringency_index FLOAT,
population_density FLOAT,
median_age FLOAT,
aged_65_older FLOAT,
aged_70_older FLOAT,
gdp_per_capita FLOAT,
extreme_poverty FLOAT,
cardiovasc_death_rate FLOAT,
diabetes_prevalence FLOAT,
female_smokers FLOAT,
male_smokers FLOAT,
handwashing_facilities FLOAT,
hospital_beds_per_thousand FLOAT,
life_expectancy FLOAT,
human_development_index FLOAT,
excess_mortality FLOAT
);

SELECT * FROM vaccine;

COPY vaccine FROM 'C:\Users\Phurin Chawjarusopas\Desktop\Covid_vaccination.csv' DELIMITER ',' CSV HEADER;

DROP TABLE vaccine; 

```

Now, onto the death table

```
CREATE TABLE death(
iso_code VARCHAR(100),
continent VARCHAR(100),
location VARCHAR(100),
date DATE,
population FLOAT,
total_cases FLOAT,
new_cases FLOAT,
new_cases_smoothed FLOAT,
total_deaths FLOAT,
new_deaths FLOAT,
new_deaths_smoothed FLOAT,
total_cases_per_million FLOAT,
new_cases_per_million FLOAT,
new_cases_smoothed_per_million FLOAT,
total_deaths_per_million FLOAT,
new_deaths_per_million FLOAT,
new_deaths_smoothed_per_million FLOAT,
reproduction_rate FLOAT,
icu_patients FLOAT,
icu_patients_per_million FLOAT,
hosp_patients FLOAT,
hosp_patients_per_million FLOAT,
weekly_icu_admissions FLOAT,
weekly_icu_admissions_per_million FLOAT,
weekly_hosp_admissions FLOAT,
weekly_hosp_admissions_per_million FLOAT
	);
	
SELECT * FROM death;

COPY death FROM 'C:\Users\Phurin Chawjarusopas\Desktop\Covid_death.csv' DELIMITER ',' CSV HEADER;

DROP TABLE death;
```

In [10]:
%%sql

SELECT 
    * 
FROM 
    vaccine
LIMIT 
    4

 * postgresql://postgres:***@localhost/Covid19_project
4 rows affected.


iso_code,continent,location,date,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
AFG,Asia,Afghanistan,2020-02-24,,,,,,,,,,,,,,,,,,,8.33,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
AFG,Asia,Afghanistan,2020-02-25,,,,,,,,,,,,,,,,,,,8.33,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
AFG,Asia,Afghanistan,2020-02-26,,,,,,,,,,,,,,,,,,,8.33,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
AFG,Asia,Afghanistan,2020-02-27,,,,,,,,,,,,,,,,,,,8.33,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,


In [23]:
%%sql

SELECT location, date, total_cases, new_cases, total_deaths, population
FROM death
ORDER BY location,date
LIMIT 10 

 * postgresql://postgres:***@localhost/Covid19_project
10 rows affected.


location,date,total_cases,new_cases,total_deaths,population
Afghanistan,2020-02-24,1.0,1.0,,38928341.0
Afghanistan,2020-02-25,1.0,0.0,,38928341.0
Afghanistan,2020-02-26,1.0,0.0,,38928341.0
Afghanistan,2020-02-27,1.0,0.0,,38928341.0
Afghanistan,2020-02-28,1.0,0.0,,38928341.0
Afghanistan,2020-02-29,1.0,0.0,,38928341.0
Afghanistan,2020-03-01,1.0,0.0,,38928341.0
Afghanistan,2020-03-02,1.0,0.0,,38928341.0
Afghanistan,2020-03-03,2.0,1.0,,38928341.0
Afghanistan,2020-03-04,4.0,2.0,,38928341.0


### Total cases vs Total death in Thailand

In [30]:

%%sql

SELECT  location, date, total_cases, total_deaths, (total_deaths/total_cases)*100 AS DeathPercentage
FROM death
WHERE location like 'Thailand'
ORDER BY location,date 
LIMIT 10

 * postgresql://postgres:***@localhost/Covid19_project
10 rows affected.


location,date,total_cases,total_deaths,deathpercentage
Thailand,2020-01-04,,,
Thailand,2020-01-05,,,
Thailand,2020-01-06,,,
Thailand,2020-01-07,,,
Thailand,2020-01-08,,,
Thailand,2020-01-09,,,
Thailand,2020-01-10,,,
Thailand,2020-01-11,,,
Thailand,2020-01-12,,,
Thailand,2020-01-13,,,


### Total cases vs population

In [31]:
%%sql

SELECT location, date, total_cases, population, (total_cases/population)*100 AS InfectionRate
FROM death
WHERE location like 'Thailand'
ORDER BY location,date;

 * postgresql://postgres:***@localhost/Covid19_project
583 rows affected.


location,date,total_cases,population,infectionrate
Thailand,2020-01-04,,69799978.0,
Thailand,2020-01-05,,69799978.0,
Thailand,2020-01-06,,69799978.0,
Thailand,2020-01-07,,69799978.0,
Thailand,2020-01-08,,69799978.0,
Thailand,2020-01-09,,69799978.0,
Thailand,2020-01-10,,69799978.0,
Thailand,2020-01-11,,69799978.0,
Thailand,2020-01-12,,69799978.0,
Thailand,2020-01-13,,69799978.0,


### Max total cases in Thailand (PROBLEM STILL NOT FIX)

In [64]:
%%sql

SELECT location, population, MAX(total_cases) AS HighestInfectionCount,  MAX((total_cases/population))*100 AS InfectionRate
FROM death
--WHERE location like 'Thailand'
GROUP BY location,population
ORDER BY InfectionRate desc


 * postgresql://postgres:***@localhost/Covid19_project
231 rows affected.


location,population,highestinfectioncount,infectionrate
Niue,1618.0,,
Anguilla,15002.0,,
French Polynesia,280904.0,,
Sint Maarten (Dutch part),42882.0,,
Wallis and Futuna,11246.0,,
Tuvalu,11792.0,,
Turks and Caicos Islands,38718.0,,
Falkland Islands,3483.0,,
Greenland,56772.0,,
Faeroe Islands,48865.0,,


### TEST CELL

In [63]:
%%sql

SELECT MAX((total_cases/population))*100 AS InfectionRate
FROM death
GROUP BY location,population
--WHERE location like 'Thailand'



 * postgresql://postgres:***@localhost/Covid19_project
(psycopg2.errors.SyntaxError) syntax error at or near "WHERE"
LINE 5: WHERE InfectionRate IS NOT NULL
        ^

[SQL: SELECT MAX((total_cases/population))*100 AS InfectionRate
FROM death
GROUP BY location,population
--WHERE location like 'Thailand'
WHERE InfectionRate IS NOT NULL]
(Background on this error at: https://sqlalche.me/e/14/f405)


### What countries have the highest infection rate?

In [92]:
%%sql

SELECT DISTINCT 
    location, 
    MAX((total_cases/population)*100) AS InfectionRate 
FROM death
GROUP BY location, population
ORDER BY infectionrate DESC
LIMIT 100


 * postgresql://postgres:***@localhost/Covid19_project
100 rows affected.


location,infectionrate
Gibraltar,
British Virgin Islands,
Turkmenistan,
Bermuda,
Northern Cyprus,
Aruba,
Curacao,
New Caledonia,
Cook Islands,
Faeroe Islands,


In [99]:

%%sql

SELECT DISTINCT 
    location, 
    MAX((total_cases/population)*100) AS InfectionRate 
FROM death
GROUP BY location, population
ORDER BY infectionrate DESC
WHERE infectionrate IS NOT NULL
LIMIT 100


 * postgresql://postgres:***@localhost/Covid19_project
(psycopg2.errors.SyntaxError) syntax error at or near "WHERE"
LINE 5: WHERE infectionrate IS NOT NULL
        ^

[SQL: SELECT DISTINCT location, MAX((total_cases/population)*100) AS InfectionRate 
FROM death
GROUP BY location, population
ORDER BY infectionrate DESC
WHERE infectionrate IS NOT NULL
LIMIT 100]
(Background on this error at: https://sqlalche.me/e/14/f405)


### Countries with the highest deathcount

In [89]:
%%sql

SELECT  Location, MAX(total_deaths) AS TotalDeathCount
FROM death
WHERE continent IS NOT NULL -- 'is to get rid of continent in location column'
GROUP BY location
ORDER BY TotalDeathCount DESC



 * postgresql://postgres:***@localhost/Covid19_project
222 rows affected.


location,totaldeathcount
French Polynesia,
Guernsey,
Vatican,
Micronesia (country),
Kiribati,
Wallis and Futuna,
Samoa,
Falkland Islands,
Gibraltar,
Jersey,


### DeathCount by continent

In [101]:
%%sql

SELECT  Location, MAX(total_deaths) AS TotalDeathCount
FROM death
WHERE continent IS NULL -- 'is to get data that shows continent in location column'
GROUP BY location
ORDER BY TotalDeathCount DESC

 * postgresql://postgres:***@localhost/Covid19_project
9 rows affected.


location,totaldeathcount
World,4294361.0
Europe,1144998.0
South America,1101990.0
Asia,935674.0
North America,932875.0
European Union,745818.0
Africa,177352.0
Oceania,1457.0
International,15.0


### Showing continent with the highest death count per population

In [104]:
%%sql

SELECT  continent, MAX(total_deaths) AS TotalDeathCount
FROM death
WHERE continent IS NOT NULL 
GROUP BY continent
ORDER BY TotalDeathCount DESC

 * postgresql://postgres:***@localhost/Covid19_project
6 rows affected.


continent,totaldeathcount
North America,616829.0
South America,563151.0
Asia,428309.0
Europe,162109.0
Africa,74813.0
Oceania,939.0


### Global Numbers

In [116]:
%%sql

SELECT SUM(new_cases) AS total_cases, SUM(new_deaths) AS total_deaths, (SUM(new_deaths) / NULLIF(SUM(new_cases),0))*100 AS DeathPercentage
FROM death
WHERE continent IS NOT NULL 
--GROUP BY date
ORDER BY 1,2 

 * postgresql://postgres:***@localhost/Covid19_project
1 rows affected.


total_cases,total_deaths,deathpercentage
202252010.0,4285543.0,2.118912439980201


### Total population vs Vaccination

In [127]:
%%sql

SELECT dea.continent, dea.location, dea.date, dea.population, 
vac.new_vaccinations, SUM(vac.new_vaccinations) 
OVER (Partition by dea.location ORDER BY dea.location, dea.date) 
AS Accumulated_Vaccination
FROM death dea
JOIN vaccine vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL 
--GROUP BY date
ORDER BY  location, date
LIMIT 1000

 * postgresql://postgres:***@localhost/Covid19_project
1000 rows affected.


continent,location,date,population,new_vaccinations,accumulated_vaccination
Asia,Afghanistan,2020-02-24,38928341.0,,
Asia,Afghanistan,2020-02-25,38928341.0,,
Asia,Afghanistan,2020-02-26,38928341.0,,
Asia,Afghanistan,2020-02-27,38928341.0,,
Asia,Afghanistan,2020-02-28,38928341.0,,
Asia,Afghanistan,2020-02-29,38928341.0,,
Asia,Afghanistan,2020-03-01,38928341.0,,
Asia,Afghanistan,2020-03-02,38928341.0,,
Asia,Afghanistan,2020-03-03,38928341.0,,
Asia,Afghanistan,2020-03-04,38928341.0,,


###  CTE Method

Able to call the new column that was just created in query like `accumulated_vaccinations` to work

In [None]:
WITH PopVsVac (continent, location, date, population, Accumulated_Vaccination )
AS 

In [143]:
%%sql

WITH PopVsVac (continent, location, date, population, new_vaccinations, 
               Accumulated_Vaccination )
AS 

(
SELECT dea.continent, dea.location, dea.date, dea.population, 
    vac.new_vaccinations, SUM(vac.new_vaccinations) 
OVER (Partition by dea.location ORDER BY dea.location, dea.date)  -- SIMILAR TO GROUP BY BUT DOES NOT REUDCE THE NUMBER OF ROWS RETURN
AS Accumulated_Vaccination
FROM death dea
JOIN vaccine vac
    ON dea.location = vac.location
    AND dea.date = vac.date
WHERE dea.continent IS NOT NULL AND dea.location like 'Thailand'
--GROUP BY date
--ORDER BY  location, date
LIMIT 1000
)

SELECT *, (Accumulated_Vaccination/population)*100 AS Vaccination_Percentage
FROM PopVsVac

 * postgresql://postgres:***@localhost/Covid19_project
583 rows affected.


continent,location,date,population,new_vaccinations,accumulated_vaccination,vaccination_percentage
Asia,Thailand,2020-01-04,69799978.0,,,
Asia,Thailand,2020-01-05,69799978.0,,,
Asia,Thailand,2020-01-06,69799978.0,,,
Asia,Thailand,2020-01-07,69799978.0,,,
Asia,Thailand,2020-01-08,69799978.0,,,
Asia,Thailand,2020-01-09,69799978.0,,,
Asia,Thailand,2020-01-10,69799978.0,,,
Asia,Thailand,2020-01-11,69799978.0,,,
Asia,Thailand,2020-01-12,69799978.0,,,
Asia,Thailand,2020-01-13,69799978.0,,,


### Temp table

I have to distribute the query as it cannot be run on a single command.  Something might be wrong, I will have a look again.

In [191]:
%%sql
DROP TABLE IF EXISTS PercentPopulationVaccinated

 * postgresql://postgres:***@localhost/Covid19_project
(psycopg2.errors.WrongObjectType) "percentpopulationvaccinated" is not a table
HINT:  Use DROP VIEW to remove a view.

[SQL: DROP TABLE IF EXISTS PercentPopulationVaccinated]
(Background on this error at: https://sqlalche.me/e/14/f405)


In [183]:
%%sql

CREATE TABLE PercentPopulationVaccinated(
    continent VARCHAR(100),
    location VARCHAR(100),
    date DATE,
    population NUMERIC,
    new_vaccinations NUMERIC,
    Accumulated_Vaccination NUMERIC
    )



 * postgresql://postgres:***@localhost/Covid19_project
Done.


[]

In [184]:
%%sql

INSERT INTO PercentPopulationVaccinated
SELECT dea.continent, dea.location, dea.date, dea.population, 
vac.new_vaccinations, SUM(vac.new_vaccinations) 
OVER (Partition by dea.location ORDER BY dea.location, dea.date) 
AS Accumulated_Vaccination
FROM death dea
JOIN vaccine vac
ON dea.location = vac.location
AND dea.date = vac.date
WHERE dea.continent IS NOT NULL 
--GROUP BY date
ORDER BY  location, date
LIMIT 1000



 * postgresql://postgres:***@localhost/Covid19_project
1000 rows affected.


[]

In [185]:
%%sql

SELECT *, (Accumulated_Vaccination/population)*100 AS Vaccination_Percentage
FROM PercentPopulationVaccinated

 * postgresql://postgres:***@localhost/Covid19_project
1000 rows affected.


continent,location,date,population,new_vaccinations,accumulated_vaccination,vaccination_percentage
Asia,Afghanistan,2020-02-24,38928341,,,
Asia,Afghanistan,2020-02-25,38928341,,,
Asia,Afghanistan,2020-02-26,38928341,,,
Asia,Afghanistan,2020-02-27,38928341,,,
Asia,Afghanistan,2020-02-28,38928341,,,
Asia,Afghanistan,2020-02-29,38928341,,,
Asia,Afghanistan,2020-03-01,38928341,,,
Asia,Afghanistan,2020-03-02,38928341,,,
Asia,Afghanistan,2020-03-03,38928341,,,
Asia,Afghanistan,2020-03-04,38928341,,,


### Create view to store data for visualization

In [200]:
%%sql
DROP VIEW IF EXISTS PercentPopulationVaccinated

 * postgresql://postgres:***@localhost/Covid19_project
Done.


[]

In [201]:
%%sql


CREATE VIEW PercentPopulationVaccinated AS
SELECT dea.continent, dea.location, dea.date, dea.population, 
    vac.new_vaccinations, SUM(vac.new_vaccinations) 
OVER (Partition by dea.location ORDER BY dea.location, dea.date)  -- SIMILAR TO GROUP BY BUT DOES NOT REUDCE THE NUMBER OF ROWS RETURN
AS Accumulated_Vaccination
FROM death dea
JOIN vaccine vac
    ON dea.location = vac.location
    AND dea.date = vac.date
WHERE dea.continent IS NOT NULL AND dea.location like 'Thailand'
--GROUP BY date
--ORDER BY  location, date
LIMIT 1000



 * postgresql://postgres:***@localhost/Covid19_project
Done.


[]

In [202]:
%%sql
SELECT *
FROM PercentPopulationVaccinated

 * postgresql://postgres:***@localhost/Covid19_project
583 rows affected.


continent,location,date,population,new_vaccinations,accumulated_vaccination
Asia,Thailand,2020-01-04,69799978.0,,
Asia,Thailand,2020-01-05,69799978.0,,
Asia,Thailand,2020-01-06,69799978.0,,
Asia,Thailand,2020-01-07,69799978.0,,
Asia,Thailand,2020-01-08,69799978.0,,
Asia,Thailand,2020-01-09,69799978.0,,
Asia,Thailand,2020-01-10,69799978.0,,
Asia,Thailand,2020-01-11,69799978.0,,
Asia,Thailand,2020-01-12,69799978.0,,
Asia,Thailand,2020-01-13,69799978.0,,


In [19]:
jovian.commit(project = 'SQL Project1 - Covid 19 Analysis')

<IPython.core.display.Javascript object>

[jovian] Updating notebook "sunskung/sql-project1-covid-19-analysis" on https://jovian.ai/
[jovian] Committed successfully! https://jovian.ai/sunskung/sql-project1-covid-19-analysis


'https://jovian.ai/sunskung/sql-project1-covid-19-analysis'