In [1]:
import numpy as np
import random
import pandas as pd
from pandasql import sqldf

In [2]:
names = ['Liam',
         'Olivia',
         'Noah',
         'Emma',
         'Oliver',
         'Ava',
         'William',
         'Sophia',
         'Elijah',
         'Isabella',
         'James',
         'Charlotte',
         'Benjamin',
         'Amelia',
         'Lucas',
         'Mia',
         'Mason',
         'Harper',
         'Ethan',
         'Evelyn']
cols = ['name']
for i in range(1,11):
    cols.append(f'test_{i}_score')

In [148]:
data = []
for i in range(20):
    row = [names[i]]
    for j in range(10):
        row.append(random.randint(60, 100))
    data.append(row)
    
scores = pd.DataFrame(data, columns=cols)

### WHERE

syntax:\
SELECT column1, column2, ...\
FROM table_name\
WHERE condition1;

In [172]:
sqldf('''
SELECT name, test_1_score FROM scores WHERE test_1_score < 77;
''')

Unnamed: 0,name,test_1_score
0,Liam,72
1,Noah,66
2,Emma,62
3,Sophia,68
4,Charlotte,60
5,Amelia,65
6,Lucas,74
7,Ethan,60


In [173]:
sqldf('''
SELECT name, test_10_score FROM scores WHERE test_10_score > 90;
''')

Unnamed: 0,name,test_10_score
0,Sophia,100
1,Lucas,91


### between 2 values

In [179]:
# Doesn't work, we can see we have some incorrect scores
sqldf('''
SELECT name, test_10_score FROM scores WHERE 80 < test_10_score < 90;
''')

Unnamed: 0,name,test_10_score
0,Liam,79
1,Olivia,86
2,Noah,72
3,Emma,75
4,Oliver,78
5,Ava,66
6,William,61
7,Sophia,100
8,Elijah,74
9,Isabella,79


In [180]:
# Gives us the filtering we want
sqldf('''
SELECT name, test_10_score FROM scores WHERE test_10_score BETWEEN 80 AND 90;
''')

Unnamed: 0,name,test_10_score
0,Olivia,86
1,Benjamin,90
2,Amelia,82
3,Mia,80
4,Mason,85
5,Harper,83
6,Ethan,82


# AND, OR

syntax: \
SELECT column1, column2, ... \
FROM table_name \
WHERE condition1 AND/OR condition2 AND/OR condition3 ...;


In [151]:
sqldf('''
SELECT name FROM scores WHERE test_1_score < 75 AND test_10_score > 85;
''')

Unnamed: 0,name
0,Sophia
1,Lucas


In [152]:
sqldf('''
SELECT name FROM scores WHERE test_1_score > 85 OR test_2_score > 85;
''')

Unnamed: 0,name
0,Liam
1,Olivia
2,Emma
3,Oliver
4,Ava
5,Elijah
6,Isabella
7,James
8,Amelia
9,Mia


In [153]:
sqldf('''
SELECT name FROM scores WHERE test_1_score < 75 OR test_2_score < 75 AND test_10_score < 75;
''')

Unnamed: 0,name
0,Liam
1,Noah
2,Emma
3,William
4,Sophia
5,Charlotte
6,Amelia
7,Lucas
8,Ethan
9,Evelyn


# ORDER BY

In [154]:
scores['test_avg'] = scores.apply(lambda row: np.mean(row[1:]), axis=1)

In [155]:
sqldf('''
SELECT name, test_avg FROM scores ORDER BY test_avg;
''')

Unnamed: 0,name,test_avg
0,Ethan,73.3
1,Charlotte,74.9
2,Liam,75.2
3,Lucas,76.1
4,William,76.2
5,Noah,76.3
6,Benjamin,77.4
7,Evelyn,78.8
8,Olivia,79.0
9,James,79.1


In [156]:
sqldf('''
SELECT name, test_avg FROM scores ORDER BY test_avg DESC;
''')

Unnamed: 0,name,test_avg
0,Isabella,86.8
1,Harper,84.3
2,Amelia,82.2
3,Oliver,82.1
4,Sophia,81.6
5,Mason,80.9
6,Mia,80.4
7,Emma,79.6
8,Ava,79.5
9,Elijah,79.3


In [157]:
sqldf('''
SELECT name, test_10_score, test_avg FROM scores ORDER BY test_avg, test_10_score;
''')

Unnamed: 0,name,test_10_score,test_avg
0,Ethan,82,73.3
1,Charlotte,68,74.9
2,Liam,79,75.2
3,Lucas,91,76.1
4,William,61,76.2
5,Noah,72,76.3
6,Benjamin,90,77.4
7,Evelyn,70,78.8
8,Olivia,86,79.0
9,James,67,79.1


# GROUP BY

In [158]:
conditions = [
    (scores['test_avg']>=93),
    (scores['test_avg']>=90),
    (scores['test_avg']>=87),
    (scores['test_avg']>=83),
    (scores['test_avg']>=80),
    (scores['test_avg']>=77),
    (scores['test_avg']>=73),
    (scores['test_avg']>=70),
    (scores['test_avg']>=67),
    (scores['test_avg']>=60),
    (scores['test_avg']<60)]
choices = ['A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'F']
scores['grade'] = np.select(conditions, choices, default='Z')
scores

Unnamed: 0,name,test_1_score,test_2_score,test_3_score,test_4_score,test_5_score,test_6_score,test_7_score,test_8_score,test_9_score,test_10_score,test_avg,grade
0,Liam,72,91,87,96,71,66,63,61,66,79,75.2,C
1,Olivia,99,79,61,74,67,74,86,78,86,86,79.0,C+
2,Noah,66,75,81,79,70,79,82,60,99,72,76.3,C
3,Emma,62,89,79,77,70,100,93,65,86,75,79.6,C+
4,Oliver,95,84,83,63,82,67,99,95,75,78,82.1,B-
5,Ava,84,95,83,89,73,76,89,71,69,66,79.5,C+
6,William,82,67,60,99,89,67,63,76,98,61,76.2,C
7,Sophia,68,82,97,95,69,63,95,79,68,100,81.6,B-
8,Elijah,92,75,71,80,86,67,76,96,76,74,79.3,C+
9,Isabella,95,86,97,95,98,61,94,96,67,79,86.8,B


In [159]:
sqldf('''
SELECT grade, COUNT(name) FROM scores GROUP BY grade;
''')

Unnamed: 0,grade,COUNT(name)
0,B,2
1,B-,5
2,C,6
3,C+,7


In [161]:
sqldf('''
SELECT grade, AVG(test_avg) FROM scores GROUP BY grade;
''')

Unnamed: 0,grade,AVG(test_avg)
0,B,85.55
1,B-,81.44
2,C,75.333333
3,C+,78.957143


In [160]:
sqldf('''
SELECT AVG(test_1_score) AS test_1_avg FROM scores;
''')

Unnamed: 0,test_1_avg
0,79.8
