-
Notifications
You must be signed in to change notification settings - Fork 0
/
Covid-19_Data_Exploration.sql
244 lines (198 loc) · 5.29 KB
/
Covid-19_Data_Exploration.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
USE Covid19
GO
-- See the first 10 records of tables
SELECT TOP(10) *
FROM CovidDeaths
ORDER BY 3,4
SELECT TOP(10) *
FROM CovidVaccinations
ORDER BY 3,4
-- Extract columns to explore
SELECT location
,continent
,date
,total_cases
,total_deaths
,new_cases
,new_deaths
,population
FROM CovidDeaths
ORDER BY 1,2
-- Convert data type of [total_cases] & [total_deaths] for calculation
ALTER TABLE CovidDeaths
ALTER COLUMN total_cases float
ALTER TABLE CovidDeaths
ALTER COLUMN total_deaths float
-- Check % deaths cases vs total cases
SELECT location
,date
,total_cases
,total_deaths
,(total_deaths/total_cases)*100 AS death_percentage
FROM CovidDeaths
-- % of death percentage of Vietnam
SELECT location
,date,total_cases
,total_deaths
,(total_deaths/total_cases)*100 AS death_percentage
FROM CovidDeaths
WHERE location = 'Vietnam'
AND continent IS NOT NULL
ORDER BY death_percentage DESC
-- % of population got Covid in Vietnam?
SELECT location
,date
,total_cases
,population
,(total_cases/population)*100 AS percentage
FROM CovidDeaths
WHERE location = 'Vietnam'
AND continent IS NOT NULL
ORDER BY percentage DESC
-- Which countries have the highest infection rate compared to population?
SELECT TOP (5)
location
,population
,MAX(total_cases) AS highest_infection
,MAX((total_cases/population))*100 AS percent_population_infected
FROM CovidDeaths
GROUP BY location,population
ORDER BY percent_population_infected DESC
-- Which countries have highest death cases?
SELECT TOP (5)
location
,MAX(total_deaths) AS total_death
FROM CovidDeaths
WHERE continent IS NOT NULL
GROUP BY location
ORDER BY 2 DESC
-- Which countries have highest % of death cases?
SELECT TOP(5)
location
,MAX((total_deaths/population))*100 AS percent_death
FROM CovidDeaths
WHERE continent IS NOT NULL
GROUP BY location
ORDER BY 2 DESC
-- Which continents have highest death cases?
SELECT continent
,MAX(CAST(total_deaths AS int)) AS total_death
FROM CovidDeaths
WHERE continent IS NOT NULL
GROUP BY continent
ORDER BY 2 DESC
-- Which continents have highest death percentage?
SELECT continent
,MAX((total_deaths/population))*100 AS percent_death
FROM CovidDeaths
WHERE continent IS NOT NULL
GROUP BY continent
ORDER BY 2 DESC
-- Calculate some new numbers
SELECT SUM(new_cases) AS total_new_cases
,SUM(new_deaths) total_new_deaths
,SUM(new_deaths)/SUM(new_cases)*100 AS new_death_percent
FROM CovidDeaths
WHERE continent IS NOT NULL
ORDER BY 1,2
-- See numbers by date
SELECT date
,SUM(new_cases) AS total_new_cases
,SUM(new_deaths) total_new_deaths
,SUM(new_deaths)/SUM(new_cases)*100 AS new_death_percent
FROM CovidDeaths
WHERE continent IS NOT NULL
AND new_cases <> 0
GROUP BY date
ORDER BY 1,2 DESC
-- Join 2 tables to looking at total Population vs Vaccinations
SELECT DEA.continent
,DEA.location
,DEA.population
,VAC.new_vaccinations
FROM CovidDeaths DEA
JOIN CovidVaccinations VAC
ON DEA.location = VAC.location
AND DEA.date = VAC.date
WHERE DEA.continent IS NOT NULL
ORDER BY 1,2,3
-- Convert data type of [new_vaccinations] column for calculation
ALTER TABLE CovidVaccinations
ALTER COLUMN new_vaccinations int
-- Show total vaccinations in Vietnam using new vaccinations day by day
SELECT DEA.continent,DEA.location
,DEA.date
,DEA.population
,VAC.new_vaccinations
,SUM(VAC.new_vaccinations) OVER (PARTITION BY DEA.location ORDER BY DEA.location,DEA.date)
AS total_vaccinated
FROM CovidDeaths DEA
JOIN CovidVaccinations VAC
ON DEA.location = VAC.location
AND DEA.date = VAC.date
WHERE DEA.continent IS NOT NULL
AND DEA.location = 'Vietnam'
ORDER BY 2,3
-- % of population have vaccinated in Vietnam?
WITH PopvsVac (continent,location,date,population,total_vaccinated)
AS
(
SELECT DEA.continent,DEA.location
,DEA.date
,DEA.population
--,VAC.new_vaccinations
,SUM(VAC.new_vaccinations) OVER (PARTITION BY DEA.location ORDER BY DEA.location,DEA.date)
AS total_vaccinated
FROM CovidDeaths DEA
JOIN CovidVaccinations VAC
ON DEA.location = VAC.location
AND DEA.date = VAC.date
WHERE DEA.continent IS NOT NULL
AND DEA.location = 'Vietnam'
)
SELECT *
,(total_vaccinated/population)*100 AS percent_vaccination
FROM PopvsVac
-- Create temp table name PercentVaccinated
CREATE TABLE PercentVaccinated
(
continent nvarchar(255),
location nvarchar(255),
date datetime,
population numeric,
new_vaccinations numeric,
total_vaccinated numeric
)
INSERT INTO PercentVaccinated
SELECT DEA.continent,DEA.location
,DEA.date
,DEA.population
,VAC.new_vaccinations
,SUM(VAC.new_vaccinations) OVER (PARTITION BY DEA.location ORDER BY DEA.location,DEA.date)
AS total_vaccinated
FROM CovidDeaths DEA
JOIN CovidVaccinations VAC
ON DEA.location = VAC.location
AND DEA.date = VAC.date
SELECT *
,(total_vaccinated/population)*100 AS percent_vaccinated
FROM PercentVaccinated
-- Create a view to store data for later visualization
CREATE VIEW Percent_Vaccinated
AS
SELECT DEA.continent,DEA.location
,DEA.date
,DEA.population
,VAC.new_vaccinations
,SUM(VAC.new_vaccinations) OVER (PARTITION BY DEA.location ORDER BY DEA.location,DEA.date)
AS total_vaccinated
FROM CovidDeaths DEA
JOIN CovidVaccinations VAC
ON DEA.location = VAC.location
AND DEA.date = VAC.date
WHERE DEA.continent IS NOT NULL
SELECT *
,(total_vaccinated/population)*100 AS percent_vaccinated
FROM Percent_Vaccinated
WHERE continent IS NOT NULL
AND location = 'Vietnam'