In [48]:
import sqlite3
import csv

with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()

    # Fresh start every time, comment out if first time creating the database
    query = "DROP TABLE athlete_events"
    cursor.execute(query)
    query = "DROP TABLE noc_regions"
    cursor.execute(query)

    # Create the table athlete_event
    query = '''
                CREATE TABLE IF NOT EXISTS athlete_events(
                ID INT,
                Name CHAR(20),
                Sex CHAR(1),
                Age INT,
                Height INT,
                Weight INT,
                Team CHAR(50),
                NOC CHAR(50),
                Games CHAR(50),
                Year INT,
                Season CHAR(50),
                City CHAR(50),
                Sport CHAR(50),
                Event CHAR(50),
                Medal CHAR(50)
            )'''
    cursor.execute(query)

    print("Table athlete_event created successfully.")

    # Create the table noc_regions
    query = '''
                CREATE TABLE IF NOT EXISTS noc_regions(
                NOC CHAR(3),
                region CHAR(30),
                notes CHAR(100)
            )'''
    cursor.execute(query)

    print("Table noc_regions created successfully.")

    with open('athlete_events.csv') as file:
        rows = csv.reader(file, delimiter=',')
        next(rows) # To skip the header
        for row in rows:
            
            query = '''
                    INSERT INTO athlete_events
                    (ID, Name, Sex, Age, Height, Weight, Team, NOC, Games, Year, Season, City, Sport, Event, Medal)
                    VALUES
                    (
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?,
                        ?
                    )
                    '''
            values = [entry for entry in row]
            cursor.execute(query, values)
    
    print("Values for athlete_events inserted successfully.")
    

    with open('noc_regions.csv') as file:
        rows = csv.reader(file, delimiter=',')
        next(rows) # To skip the header
        for row in rows:
            
            query = '''
                    INSERT INTO noc_regions
                    (NOC, region, notes)
                    VALUES
                    (
                        ?,
                        ?,
                        ?
                    )
                    '''
            values = [entry for entry in row]
            cursor.execute(query, values)
    
    print("Values for noc_regions inserted successfully.")

    conn.commit()


Table athlete_event created successfully.
Table noc_regions created successfully.
Values for athlete_events inserted successfully.
Values for noc_regions inserted successfully.


In [2]:
# 1. How many olympics games have been held?

with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
            SELECT COUNT(DISTINCT Games) FROM athlete_events
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)


(51,)


In [3]:
# 2. List down all Olympics games held so far.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
            SELECT DISTINCT Games FROM athlete_events
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('1992 Summer',)
('2012 Summer',)
('1920 Summer',)
('1900 Summer',)
('1988 Winter',)
('1992 Winter',)
('1994 Winter',)
('1932 Summer',)
('2002 Winter',)
('1952 Summer',)
('1980 Winter',)
('2000 Summer',)
('1996 Summer',)
('1912 Summer',)
('1924 Summer',)
('2014 Winter',)
('1948 Summer',)
('1998 Winter',)
('2006 Winter',)
('2008 Summer',)
('2016 Summer',)
('2004 Summer',)
('1960 Winter',)
('1964 Winter',)
('1984 Winter',)
('1984 Summer',)
('1968 Summer',)
('1972 Summer',)
('1988 Summer',)
('1936 Summer',)
('1952 Winter',)
('1956 Winter',)
('1956 Summer',)
('1960 Summer',)
('1928 Summer',)
('1976 Summer',)
('1980 Summer',)
('1964 Summer',)
('2010 Winter',)
('1968 Winter',)
('1906 Summer',)
('1972 Winter',)
('1976 Winter',)
('1924 Winter',)
('1904 Summer',)
('1928 Winter',)
('1908 Summer',)
('1948 Winter',)
('1932 Winter',)
('1936 Winter',)
('1896 Summer',)


In [4]:
# 3. Mention the total no of nations who participated in each olympics game?
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
            SELECT COUNT(DISTINCT NOC), Games FROM athlete_events
            GROUP BY Games
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

# NOTE: At first I used Team, but actually different teams for the same nation appear in the historic DB.
# Using NOC it's more correct as it represents the country of origin

(12, '1896 Summer')
(31, '1900 Summer')
(15, '1904 Summer')
(21, '1906 Summer')
(22, '1908 Summer')
(29, '1912 Summer')
(29, '1920 Summer')
(45, '1924 Summer')
(19, '1924 Winter')
(46, '1928 Summer')
(25, '1928 Winter')
(47, '1932 Summer')
(17, '1932 Winter')
(49, '1936 Summer')
(28, '1936 Winter')
(59, '1948 Summer')
(28, '1948 Winter')
(69, '1952 Summer')
(30, '1952 Winter')
(72, '1956 Summer')
(32, '1956 Winter')
(84, '1960 Summer')
(30, '1960 Winter')
(93, '1964 Summer')
(36, '1964 Winter')
(112, '1968 Summer')
(37, '1968 Winter')
(121, '1972 Summer')
(35, '1972 Winter')
(92, '1976 Summer')
(37, '1976 Winter')
(80, '1980 Summer')
(37, '1980 Winter')
(140, '1984 Summer')
(49, '1984 Winter')
(159, '1988 Summer')
(57, '1988 Winter')
(169, '1992 Summer')
(64, '1992 Winter')
(67, '1994 Winter')
(197, '1996 Summer')
(72, '1998 Winter')
(200, '2000 Summer')
(77, '2002 Winter')
(201, '2004 Summer')
(79, '2006 Winter')
(204, '2008 Summer')
(82, '2010 Winter')
(205, '2012 Summer')
(89, '2014

In [5]:
# 4. Which year saw the highest and lowest no of countries participating in olympics?
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
            SELECT Year, MAX(NumberCountries) FROM
            (SELECT Year, COUNT(DISTINCT NOC) as NumberCountries FROM athlete_events
            GROUP BY Year)
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

    query = '''
            SELECT Year, MIN(NumberCountries) FROM
            (SELECT Year, COUNT(DISTINCT NOC) as NumberCountries FROM athlete_events
            GROUP BY Year)
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

(2016, 207)
(1896, 12)


In [12]:
# 5. Which nation has participated in all of the olympic games?
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
                    SELECT n.region, COUNT(DISTINCT a.Games) AS game_count
                    FROM athlete_events AS a JOIN noc_regions as n
                    ON a.NOC = n.NOC
                    GROUP BY n.region
                    HAVING game_count = (
                                       SELECT COUNT(DISTINCT a.Games)
                                       FROM athlete_events AS a
                            )
                    
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('France', 51)
('Italy', 51)
('Switzerland', 51)
('UK', 51)


In [15]:
# 6. Identify the sport which was played in all summer olympics.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
                SELECT Sport, COUNT(DISTINCT Games) as game_count
                FROM athlete_events
                GROUP BY Sport
                HAVING Season = "Summer" AND game_count = (
                                        
                                    SELECT COUNT(DISTINCT Games)
                                    FROM athlete_events
                                    WHERE Season = "Summer"
                )
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('Athletics', 29)
('Cycling', 29)
('Fencing', 29)
('Gymnastics', 29)
('Swimming', 29)


In [17]:
# 7. Which Sports were just played only once in the olympics?
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
                SELECT Sport, COUNT(DISTINCT Games) as game_count
                FROM athlete_events
                GROUP BY Sport
                HAVING game_count = 1
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('Aeronautics', 1)
('Basque Pelota', 1)
('Cricket', 1)
('Croquet', 1)
('Jeu De Paume', 1)
('Military Ski Patrol', 1)
('Motorboating', 1)
('Racquets', 1)
('Roque', 1)
('Rugby Sevens', 1)


In [20]:
# 9. Fetch the total no of sports played in each olympic games.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
                SELECT COUNT(DISTINCT Sport) as sports_count, Games
                FROM athlete_events
                GROUP BY Games
                ORDER BY sports_count DESC
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

(34, '2016 Summer')
(34, '2008 Summer')
(34, '2004 Summer')
(34, '2000 Summer')
(32, '2012 Summer')
(31, '1996 Summer')
(29, '1992 Summer')
(27, '1988 Summer')
(25, '1984 Summer')
(25, '1920 Summer')
(24, '1936 Summer')
(24, '1908 Summer')
(23, '1980 Summer')
(23, '1976 Summer')
(23, '1972 Summer')
(21, '1964 Summer')
(20, '1968 Summer')
(20, '1948 Summer')
(20, '1924 Summer')
(20, '1900 Summer')
(19, '1960 Summer')
(19, '1956 Summer')
(19, '1952 Summer')
(18, '1932 Summer')
(18, '1904 Summer')
(17, '1928 Summer')
(17, '1912 Summer')
(15, '2014 Winter')
(15, '2010 Winter')
(15, '2006 Winter')
(15, '2002 Winter')
(14, '1998 Winter')
(13, '1906 Summer')
(12, '1994 Winter')
(12, '1992 Winter')
(10, '1988 Winter')
(10, '1984 Winter')
(10, '1980 Winter')
(10, '1976 Winter')
(10, '1972 Winter')
(10, '1968 Winter')
(10, '1964 Winter')
(10, '1924 Winter')
(9, '1948 Winter')
(9, '1896 Summer')
(8, '1960 Winter')
(8, '1956 Winter')
(8, '1952 Winter')
(8, '1936 Winter')
(8, '1928 Winter')
(7, '19

In [32]:
# 10. Fetch details of the oldest athletes to win a gold medal.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
                SELECT *
                FROM (
                    SELECT * 
                    FROM athlete_events
                    WHERE Medal = "Gold"
                    )
                WHERE Age = (
                    SELECT MAX(Age)
                    FROM athlete_events
                    WHERE Medal = "Gold" AND Age != "NA"
                )
                

            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

(53238, 'Charles Jacobus', 'M', 64, 'NA', 'NA', 'United States', 'USA', '1904 Summer', 1904, 'Summer', 'St. Louis', 'Roque', "Roque Men's Singles", 'Gold')
(117046, 'Oscar Gomer Swahn', 'M', 64, 'NA', 'NA', 'Sweden', 'SWE', '1912 Summer', 1912, 'Summer', 'Stockholm', 'Shooting', "Shooting Men's Running Target, Single Shot, Team", 'Gold')


In [63]:
# 11. Find the Ratio of male and female athletes participated in all olympic games.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
                WITH maletable (males) AS (
                    SELECT COUNT(*)
                    FROM athlete_events
                    WHERE Sex = "M"
                ),
                femaletable (females) AS (
                    SELECT COUNT(*)
                    FROM athlete_events
                    WHERE Sex = "F"            
                )
                SELECT ROUND(CAST(males AS DECIMAL), 2) / females
                FROM maletable, femaletable
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

(2.638066611202061,)


In [72]:
# 12. Fetch the top 5 athletes who have won the most gold medals.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
                WITH T1 AS (
                    SELECT ID, Name, Games, COUNT(Medal) AS Golds_Number
                    FROM (
                        SELECT *
                        FROM athlete_events
                        WHERE Medal = "Gold"
                    )
                    GROUP BY ID
                ),
                T2 as
                    (SELECT *, DENSE_RANK() OVER (ORDER BY Golds_Number desc) as rank
                    FROM T1)
                
                SELECT * 
                FROM T2
                WHERE rank <= 5
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

(94406, 'Michael Fred Phelps, II', '2004 Summer', 23, 1)
(33557, 'Raymond Clarence "Ray" Ewry', '1900 Summer', 10, 2)
(67046, 'Larysa Semenivna Latynina (Diriy-)', '1956 Summer', 9, 3)
(69210, 'Frederick Carlton "Carl" Lewis', '1984 Summer', 9, 3)
(87390, 'Paavo Johannes Nurmi', '1920 Summer', 9, 3)
(113912, 'Mark Andrew Spitz', '1968 Summer', 9, 3)
(11642, 'Matthew Nicholas "Matt" Biondi', '1984 Summer', 8, 4)
(11951, 'Ole Einar Bjrndalen', '1998 Winter', 8, 4)
(13029, 'Usain St. Leo Bolt', '2008 Summer', 8, 4)
(35550, 'Birgit Fischer-Schmidt', '1980 Summer', 8, 4)
(57998, 'Sawao Kato', '1968 Summer', 8, 4)
(119922, 'Jennifer Elisabeth "Jenny" Thompson (-Cumpelik)', '1992 Summer', 8, 4)
(4198, 'Nikolay Yefimovich Andrianov', '1972 Summer', 7, 5)
(18826, 'Vra slavsk (-Odloilov)', '1964 Summer', 7, 5)
(21402, 'Viktor Ivanovych Chukarin', '1952 Summer', 7, 5)
(39726, 'Aladr Gerevich (-Gerei)', '1932 Summer', 7, 5)
(107383, 'Donald Arthur "Don" Schollander', '1964 Summer', 7, 5)
(109161, 

In [75]:
# 13. Fetch the top 5 athletes who have won the most medals (gold/silver/bronze).
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
                WITH T1 AS (
                    SELECT ID, Name, COUNT(Medal) AS Medal_Number
                    FROM (
                        SELECT *
                        FROM athlete_events
                        WHERE Medal != "NA"
                    )
                    GROUP BY ID

                ),
                T2 as
                    (SELECT *, DENSE_RANK() OVER (ORDER BY Medal_Number DESC) as rank
                    FROM T1)
                
                SELECT * 
                FROM T2
                WHERE rank <= 5
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

(94406, 'Michael Fred Phelps, II', 28, 1)
(67046, 'Larysa Semenivna Latynina (Diriy-)', 18, 2)
(4198, 'Nikolay Yefimovich Andrianov', 15, 3)
(11951, 'Ole Einar Bjrndalen', 13, 4)
(74420, 'Edoardo Mangiarotti', 13, 4)
(89187, 'Takashi Ono', 13, 4)
(109161, 'Borys Anfiyanovych Shakhlin', 13, 4)
(23426, 'Natalie Anne Coughlin (-Hall)', 12, 5)
(35550, 'Birgit Fischer-Schmidt', 12, 5)
(57998, 'Sawao Kato', 12, 5)
(70965, 'Ryan Steven Lochte', 12, 5)
(85286, 'Aleksey Yuryevich Nemov', 12, 5)
(87390, 'Paavo Johannes Nurmi', 12, 5)
(119922, 'Jennifer Elisabeth "Jenny" Thompson (-Cumpelik)', 12, 5)
(121258, 'Dara Grace Torres (-Hoffman, -Minas)', 12, 5)


In [14]:
# 14. Fetch the top 5 most successful countries in olympics. Success is defined by no of medals won.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = '''
                WITH T1 AS (
                    SELECT n.region, COUNT(1) AS Medals_count
                    FROM athlete_events as a
                    JOIN noc_regions as n on a.NOC = n.NOC
                    WHERE a.Medal != "NA"
                    GROUP BY region
                ),
                
                T2 as
                    (SELECT *, DENSE_RANK() OVER (ORDER BY Medals_count DESC) as rank
                    FROM T1)
                
                SELECT * 
                FROM T2
                WHERE rank <= 5
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('USA', 5637, 1)
('Russia', 3947, 2)
('Germany', 3756, 3)
('UK', 2068, 4)
('France', 1777, 5)


In [24]:
# 15. List down total gold, silver and broze medals won by each country.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = ''' 
                WITH T1 AS (
                SELECT region,
                CASE Medal
                    WHEN 'Gold' THEN 1
                END AS Gold,
                CASE Medal
                    WHEN 'Silver' THEN 1
                END AS Silver,
                CASE Medal
                    WHEN 'Bronze' THEN 1
                END AS Bronze
                FROM athlete_events
                JOIN noc_regions on noc_regions.NOC = athlete_events.NOC
                )

                SELECT region, SUM(Gold) as Gold_num, SUM(Silver), SUM(Bronze)
                FROM T1
                GROUP BY region
                ORDER BY Gold_num DESC
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('USA', 2638, 1641, 1358)
('Russia', 1599, 1170, 1178)
('Germany', 1301, 1195, 1260)
('UK', 678, 739, 651)
('Italy', 575, 531, 531)
('France', 501, 610, 666)
('Sweden', 479, 522, 535)
('Canada', 463, 438, 451)
('Hungary', 432, 332, 371)
('Norway', 378, 361, 294)
('Australia', 368, 459, 522)
('China', 351, 349, 293)
('Netherlands', 287, 340, 413)
('Japan', 247, 309, 357)
('South Korea', 221, 232, 185)
('Finland', 198, 270, 432)
('Denmark', 179, 241, 177)
('Switzerland', 175, 248, 268)
('Cuba', 164, 129, 116)
('Romania', 161, 200, 292)
('Serbia', 157, 222, 160)
('India', 138, 19, 40)
('Czech Republic', 123, 262, 259)
('Poland', 117, 195, 253)
('Spain', 110, 243, 136)
('Brazil', 109, 175, 191)
('Austria', 108, 186, 156)
('Belgium', 98, 197, 173)
('Argentina', 91, 92, 91)
('New Zealand', 90, 56, 82)
('Greece', 62, 109, 84)
('Croatia', 58, 54, 37)
('Bulgaria', 54, 144, 144)
('Ukraine', 47, 52, 100)
('Pakistan', 42, 45, 34)
('Turkey', 40, 27, 28)
('Jamaica', 38, 75, 44)
('Kenya', 34, 41, 31)

In [26]:
# 16. List down total gold, silver and broze medals won by each country corresponding to each olympic games.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = ''' 
                WITH T1 AS (
                SELECT region, Games,
                CASE Medal
                    WHEN 'Gold' THEN 1
                END AS Gold,
                CASE Medal
                    WHEN 'Silver' THEN 1
                END AS Silver,
                CASE Medal
                    WHEN 'Bronze' THEN 1
                END AS Bronze
                FROM athlete_events
                JOIN noc_regions on noc_regions.NOC = athlete_events.NOC
                )

                SELECT Games, region, SUM(Gold) as Gold_num, SUM(Silver), SUM(Bronze)
                FROM T1
                GROUP BY Games, region
                ORDER BY Gold_num DESC
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('1980 Summer', 'Russia', 187, 129, 126)
('1984 Summer', 'USA', 186, 116, 50)
('1996 Summer', 'USA', 159, 48, 52)
('1908 Summer', 'UK', 147, 131, 90)
('2012 Summer', 'USA', 145, 57, 46)
('2016 Summer', 'USA', 139, 54, 71)
('1988 Summer', 'Russia', 134, 67, 99)
('2000 Summer', 'USA', 130, 61, 51)
('1904 Summer', 'USA', 128, 141, 125)
('2008 Summer', 'USA', 127, 110, 80)
('1976 Summer', 'Germany', 123, 76, 74)
('2004 Summer', 'USA', 117, 75, 71)
('1980 Summer', 'Germany', 115, 88, 61)
('1976 Summer', 'Russia', 114, 95, 77)
('1920 Summer', 'USA', 111, 45, 38)
('1988 Summer', 'Germany', 111, 91, 94)
('1972 Summer', 'Russia', 107, 47, 60)
('1912 Summer', 'Sweden', 103, 62, 25)
('1968 Summer', 'USA', 99, 32, 35)
('1924 Summer', 'USA', 97, 36, 49)
('1964 Summer', 'USA', 95, 37, 37)
('1936 Summer', 'Germany', 93, 70, 61)
('1992 Summer', 'Russia', 92, 61, 67)
('1992 Summer', 'USA', 89, 50, 85)
('1948 Summer', 'USA', 87, 30, 35)
('1988 Summer', 'USA', 87, 66, 54)
('1952 Summer', 'USA', 83, 23, 2

In [52]:
# 17. Identify which country won the most gold, most silver and most bronze medals in each olympic games.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = ''' 
            
            '''
    # TODO
    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

In [53]:
# 18. Identify which country won the most gold, most silver, most bronze medals and the most medals in each olympic games.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = ''' 
            
            '''
    # TODO
    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

In [60]:
# 19. Which countries have never won gold medal but have won silver/bronze medals?
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = ''' 
                WITH T1 AS (
                SELECT region,
                CASE Medal
                    WHEN 'Gold' THEN 1
                    ELSE 0
                END AS Gold,
                CASE Medal
                    WHEN 'Silver' THEN 1
                    ELSE 0
                END AS Silver,
                CASE Medal
                    WHEN 'Bronze' THEN 1
                    ELSE 0
                END AS Bronze
                FROM athlete_events
                JOIN noc_regions on noc_regions.NOC = athlete_events.NOC
                )

                SELECT region, SUM(Gold) AS Gold_num, SUM(Silver) AS Silver_num, SUM(Bronze) AS Bronze_num
                FROM T1
                GROUP BY region
                HAVING Gold_num = 0 AND (Silver_num > 0 OR Bronze_num > 0)
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('Afghanistan', 0, 0, 2)
('Barbados', 0, 0, 1)
('Bermuda', 0, 0, 1)
('Botswana', 0, 1, 0)
('Curacao', 0, 1, 0)
('Cyprus', 0, 1, 0)
('Djibouti', 0, 0, 1)
('Eritrea', 0, 0, 1)
('Gabon', 0, 1, 0)
('Ghana', 0, 1, 22)
('Guatemala', 0, 1, 0)
('Guyana', 0, 0, 1)
('Iceland', 0, 15, 2)
('Iraq', 0, 0, 1)
('Kuwait', 0, 0, 2)
('Kyrgyzstan', 0, 1, 2)
('Lebanon', 0, 2, 2)
('Macedonia', 0, 0, 1)
('Malaysia', 0, 11, 5)
('Mauritius', 0, 0, 1)
('Moldova', 0, 3, 5)
('Monaco', 0, 0, 1)
('Montenegro', 0, 14, 0)
('Namibia', 0, 4, 0)
('Niger', 0, 1, 1)
('Paraguay', 0, 17, 0)
('Philippines', 0, 3, 7)
('Qatar', 0, 1, 4)
('Saudi Arabia', 0, 1, 5)
('Senegal', 0, 1, 0)
('Sri Lanka', 0, 2, 0)
('Sudan', 0, 1, 0)
('Tanzania', 0, 2, 0)
('Togo', 0, 0, 1)
('Tonga', 0, 1, 0)
('Virgin Islands, US', 0, 1, 0)
('Zambia', 0, 1, 1)


In [92]:
# 20. In which Sport/event, India has won highest medals.
with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = ''' 
                SELECT region, Sport, COUNT(Medal) as count_medals
                FROM (
                    SELECT n.region, a.Games, a.Sport, a.Medal
                    FROM athlete_events a
                    JOIN noc_regions n ON n.NOC = a.NOC
                    WHERE a.Medal != 'NA' AND  n.region = 'India'
                )
                GROUP BY Sport
                ORDER BY count_medals DESC
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('India', 'Hockey', 173)
('India', 'Alpinism', 7)
('India', 'Wrestling', 5)
('India', 'Shooting', 4)
('India', 'Boxing', 2)
('India', 'Badminton', 2)
('India', 'Athletics', 2)
('India', 'Weightlifting', 1)
('India', 'Tennis', 1)


In [85]:
# 21. Break down all olympic games where india won medal for Hockey and how many medals in each olympic games.

with sqlite3.connect('olympics.db') as conn:
    cursor = conn.cursor()
    query = ''' 
                SELECT region, Sport, Games, COUNT(Medal) AS count_medals
                FROM (
                    SELECT n.region, a.Games, a.Sport, a.Medal
                    FROM athlete_events a
                    JOIN noc_regions n ON n.NOC = a.NOC
                    WHERE a.Medal != 'NA'AND  n.region = 'India' AND a.Sport = 'Hockey' 
                )
                GROUP BY Games
                ORDER BY count_medals DESC
            '''

    cursor.execute(query)
    for entry in cursor.fetchall():
        print(entry)

('India', 'Hockey', '1948 Summer', 20)
('India', 'Hockey', '1936 Summer', 19)
('India', 'Hockey', '1956 Summer', 17)
('India', 'Hockey', '1980 Summer', 16)
('India', 'Hockey', '1968 Summer', 16)
('India', 'Hockey', '1964 Summer', 15)
('India', 'Hockey', '1932 Summer', 15)
('India', 'Hockey', '1972 Summer', 14)
('India', 'Hockey', '1952 Summer', 14)
('India', 'Hockey', '1928 Summer', 14)
('India', 'Hockey', '1960 Summer', 13)
