# What is group by?

The purpose of this is to demonstrate what the "group by" clause does in SQL. 

SQLite is like a baby SQL database; free, lightweight, and the database exists in a single file. Good for testing things out. I'm going to create a table called "people" with random rows, each representing an imaginary person.

In [1]:
import sqlite3
import random

In [2]:
import sqlite3
import random
conn = sqlite3.connect('test.sqlite3')


In [3]:

conn.execute("drop table people")


<sqlite3.Cursor at 0x10ecc1b90>

In [4]:

conn.execute("create table people  (id integer PRIMARY KEY, firstname varchar[100], lastname varchar[100], weight integer, age integer)")

for i in range(1000):
    random_firstname =  ''.join(random.choice("abcdefghijklmnopqrstuvwxyz") for i in range(20))
    random_lastname =  ''.join(random.choice("abcdefghijklmnopqrstuvwxyz") for i in range(20))
    random_age = random.randint(5, 50)
    random_weight = random.randint(random_age*4, random_age*9)
    conn.execute("insert into people values (?,?,?,?,?)", (i, random_firstname, random_lastname, random_weight, random_age))
       

conn.commit()




In [5]:
c = conn.cursor()
for row in c.execute("select * from people").fetchmany(10):
    print(row)
    

(0, 'jhxwwukzikvpaqrbgyjc', 'bqmizvkefxkdtdjukeze', 334, 39)
(1, 'opzaxzzveblxlrdvqjsk', 'vpugcimnwjnhqxgfejcc', 62, 15)
(2, 'mvxmzkcaljoojhriyaed', 'rnegzelhrsrppntbrzyg', 320, 45)
(3, 'tyklamzfcekjnwmtmnvv', 'ezatjnplnbbjxulbldgh', 83, 10)
(4, 'avtbmvnpqqydvbqdqacy', 'thxqfqizklyoxqkyhvod', 178, 29)
(5, 'lrcvbjyvnccinlmdwnpd', 'jfxyvnbsledzgqvgdbwg', 68, 8)
(6, 'onqkdenfubnjetmbehgr', 'rxuwcizmhaplbemovnjq', 107, 20)
(7, 'soofxqdynubmehznqdze', 'gideibaaxhsgmzraclzq', 54, 8)
(8, 'wiatospnqdrhcazlzqjl', 'vqlhsxnxddtmcnzseqdu', 153, 27)
(9, 'bfimduuqnxawwrpqpxjw', 'slfsjzdbbsqqajpxunhf', 124, 26)


Let's first talk about "order by". 

In [6]:
c = conn.cursor()
for row in c.execute("select * from people order by age").fetchmany(20):
    print(row)

(79, 'ixjjxeocdyhijphbigyt', 'mhhduajcqfjziivoccdb', 36, 5)
(151, 'gfmgdetsbdcyalgmnggw', 'vcgaxbznnjsfhyugrvgh', 21, 5)
(260, 'bzhverbcfdiuewjituml', 'bzxtpzfovwfwfzyhejjt', 27, 5)
(361, 'hydomufoiphiszakafmu', 'thratikfnflftdvycdds', 20, 5)
(381, 'tafchgnkvaoywdjbopik', 'bfoqryplxrfrueopwfev', 38, 5)
(382, 'exknhddlgzrdmgfnpuno', 'mnozaniakbjvdwaprcql', 27, 5)
(445, 'eyjpkqlldwbqahqzgcdh', 'qclktqqitlmoqkxmhhyt', 38, 5)
(454, 'bfuqfgkctqwtoetgqiid', 'bhgskslmptblafyzzuqi', 34, 5)
(455, 'jpqrsecdguqvnordczmf', 'zgyvobnqodsdfagvuxbk', 36, 5)
(547, 'ovvtdiiahlhodrylvfvv', 'yohofyszaicwjszhnkwk', 29, 5)
(589, 'bxaohbthajnkvkzjaeig', 'awethdwqsucpkxmjokgo', 38, 5)
(606, 'dsnkdayicrqqkvtnvxle', 'cfpuikyrrkhrrkyxxpyt', 35, 5)
(649, 'qbbmighacdxwjmkxevei', 'klmatacivbyxrtryihex', 22, 5)
(671, 'xbatrxlvjgftmiuyuvbj', 'knqnexhigfrkqxpdjcvp', 38, 5)
(822, 'pnhcaabofuqvmctstxqg', 'pvnkieoskabhxkxbaeya', 25, 5)
(978, 'dgggwwmxjzeoflymwjie', 'ofibymcmpupgbssocrvz', 25, 5)
(997, 'juctcpykkpdewwnsbh

In [7]:
c = conn.cursor()
for row in c.execute("select * from people order by weight").fetchmany(20):
    print(row)

(361, 'hydomufoiphiszakafmu', 'thratikfnflftdvycdds', 20, 5)
(151, 'gfmgdetsbdcyalgmnggw', 'vcgaxbznnjsfhyugrvgh', 21, 5)
(649, 'qbbmighacdxwjmkxevei', 'klmatacivbyxrtryihex', 22, 5)
(793, 'rysfxzvslbyfssqospmw', 'heymflljngdrwwweupbu', 24, 6)
(806, 'ydlovlyvisqrnykalgup', 'gxeoutwzyfppwtawkdua', 25, 6)
(822, 'pnhcaabofuqvmctstxqg', 'pvnkieoskabhxkxbaeya', 25, 5)
(978, 'dgggwwmxjzeoflymwjie', 'ofibymcmpupgbssocrvz', 25, 5)
(295, 'plujjkvwbshsxuhnmshs', 'qkqewxgjthqymdkqjqhh', 26, 6)
(723, 'lmoeefiofpdqgoswqskn', 'yhcwjrfnttsxbjjpbrzj', 26, 6)
(260, 'bzhverbcfdiuewjituml', 'bzxtpzfovwfwfzyhejjt', 27, 5)
(382, 'exknhddlgzrdmgfnpuno', 'mnozaniakbjvdwaprcql', 27, 5)
(613, 'esbfcpscicmakxjztsww', 'checzbudvgvjuamxfqpz', 28, 7)
(155, 'yfylkeesjkkavtlrmeel', 'fjqajnsffqzguuemparg', 29, 7)
(547, 'ovvtdiiahlhodrylvfvv', 'yohofyszaicwjszhnkwk', 29, 5)
(267, 'nnzinivilwsslgvoeujz', 'lcaflukhdtfibfzfvews', 30, 6)
(624, 'hcztueuqiezclwyvains', 'wslhtnhurwjkqayyqtos', 31, 6)
(957, 'jcslfmhjklfgsgbiv

order by returns the whole results of the select statement, except that it sorts the results by some column.

In [10]:
c = conn.cursor()
c.execute("select avg(weight) from people").fetchall()

[(180.941,)]

In [12]:
c = conn.cursor()
c.execute("select min(weight) from people").fetchall()

[(20,)]

In [None]:
c = conn.cursor()
c.execute("select max(weight) from people").fetchall()