-
Notifications
You must be signed in to change notification settings - Fork 0
/
Cleaning Queries.sql
76 lines (65 loc) · 1.79 KB
/
Cleaning Queries.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
/*Cleaning data LOG -
1)Deleting Null and Duplicate values in the Primary Key - ride_ID
2)The station ID values had inconsistent formatting, which was corrected.
2)Remove headers resulting from compilation (12 Rows)
3)DROPPING Columns - Start and End lattitude and Longitude - because Irrelavent
5)DELETED data where start time was greater than the end time
*/
-- Delete Queries
ALTER TABLE bike_data
DROP COLUMN Start_lat, End_lat, Start_lng, End_lng;
DELETE
FROM
bike_data
WHERE
ride_id is null
AND ride_id = 'ride_id'; -- resulting from compilation
DELETE
FROM
bike_data
WHERE
Started_at > Ended_at; -- does not make sense
### Checking integrity of the data and exploring the data
--1 Types of Members
SELECT DISTINCT
member_casual
FROM
bike_data;
-- Two types of members - Ok
--2 Date Range
SELECT DISTINCT
Extract(YEAR_MONTH from started_at)
FROM
bike_data;
-- Data range - 2021 (Jan to Dec) - Ok
--3 start station count
SELECT
count(DISTINCT start_station_id),
TRIM(start_station_name)
FROM
bike_data
WHERE start_station_id <> '' AND start_station_name <> ''
ORDER by
start_station_name;
--4 End station count
SELECT
count(DISTINCT end_station_id),
TRIM(end_station_name)
FROM
bike_data
WHERE end_station_id <> '' AND end_station_name <> ''
ORDER by
start_station_name;
-- 845 end stations
--5 Rideable types
SELECT distinct
rideable_type
FROM
bike_data;
-- There are three types of bikes - ok
-- Create Queries
-- To make queries faster I divided the dataset into two parts 1)member data 2)casual data
CREATE TABLE member_data AS
SELECT * from bike_data where member_casual like '%member%';
CREATE TABLE casual_data AS
SELECT * from bike_data where member_casual like '%casual%';