In [1]:
import warnings
warnings.filterwarnings(action='ignore') 

import pandas as pd
import numpy as np
import csv
import folium
import matplotlib.pyplot as plt
plt.rc('font',family='D2CodingLigature Nerd Font')

| 함수          | 기능              |
|---------------|-------------------|
| query()       | 행 추출           |
| df[]          | 열(변수) 추출     |
| sort_values() | 정렬              |
| groupby()     | 집단별로 나누기   |
| agg()         | 통계치 구하기     |
| merge()       | 데이터 합치기(열) |
| concat()      | 데이터 합치기(행) |

In [None]:
# pandas에서 csv파일 읽어오기

exam = pd.read_csv('../../data/exam.csv')
exam

Unnamed: 0,id,nclass,math,english,science
0,1,1,50,98,50
1,2,1,60,97,60
2,3,1,45,86,78
3,4,1,30,98,58
4,5,2,25,80,65
5,6,2,50,89,98
6,7,2,80,90,45
7,8,2,90,78,25
8,9,3,20,98,15
9,10,3,50,98,45


In [3]:
## query()

exam.query('nclass == 1')

Unnamed: 0,id,nclass,math,english,science
0,1,1,50,98,50
1,2,1,60,97,60
2,3,1,45,86,78
3,4,1,30,98,58


In [4]:
exam.query('nclass != 1')

Unnamed: 0,id,nclass,math,english,science
4,5,2,25,80,65
5,6,2,50,89,98
6,7,2,80,90,45
7,8,2,90,78,25
8,9,3,20,98,15
9,10,3,50,98,45
10,11,3,65,65,65
11,12,3,45,85,32
12,13,4,46,98,65
13,14,4,48,87,12


In [7]:
# 수학점수가 70점 이상인 데이터

exam.query('math>=70')

Unnamed: 0,id,nclass,math,english,science
6,7,2,80,90,45
7,8,2,90,78,25
14,15,4,75,56,78
17,18,5,80,78,90
18,19,5,89,68,87
19,20,5,78,83,58


In [None]:
# 1반 학생중 수학점수가 50점이상인 학생

exam.query('nclass==1 and math>=50')

Unnamed: 0,id,nclass,math,english,science
0,1,1,50,98,50
1,2,1,60,97,60


In [None]:
# 수학이 90점 이상이거나 영어가 90점 이상인 학생

exam.query('math>=90 or english>=90')

Unnamed: 0,id,nclass,math,english,science
0,1,1,50,98,50
1,2,1,60,97,60
3,4,1,30,98,58
6,7,2,80,90,45
7,8,2,90,78,25
8,9,3,20,98,15
9,10,3,50,98,45
12,13,4,46,98,65
15,16,4,58,98,65


In [None]:
# 1, 3, 5반 데이터만 추출

exam.query('nclass in [1, 3, 5]')

Unnamed: 0,id,nclass,math,english,science
0,1,1,50,98,50
1,2,1,60,97,60
2,3,1,45,86,78
3,4,1,30,98,58
8,9,3,20,98,15
9,10,3,50,98,45
10,11,3,65,65,65
11,12,3,45,85,32
16,17,5,65,68,98
17,18,5,80,78,90


In [16]:
# 1반 수학 점수 평균 구하기

# a = exam.query('nclass == 1')
# a['math'].mean()

exam.query('nclass == 1')['math'].mean()

np.float64(46.25)

In [17]:
## 변수를 이용해서 query 조건 만들기

a = int(input("몇 반 학생의 데이터 조회를 원하십니까? "))
exam.query('nclass == @a')

Unnamed: 0,id,nclass,math,english,science
4,5,2,25,80,65
5,6,2,50,89,98
6,7,2,80,90,45
7,8,2,90,78,25


In [18]:
# 2개의 반 조건을 입력받아서 출력

a = int(input("첫 번째 반 : "))
b = int(input("두 번째 반 : "))
exam.query('nclass == @a  |  nclass == @b')

Unnamed: 0,id,nclass,math,english,science
8,9,3,20,98,15
9,10,3,50,98,45
10,11,3,65,65,65
11,12,3,45,85,32
12,13,4,46,98,65
13,14,4,48,87,12
14,15,4,75,56,78
15,16,4,58,98,65


In [22]:
## 1반 학생 데이터 중 english, math 항목만 추출

exam.query('nclass == 1')[['english', 'math']]

Unnamed: 0,english,math
0,98,50
1,97,60
2,86,45
3,98,30


In [None]:
# math가 50이상인 학생중에서 id, math만 추출

exam.query('math >= 50')[['id', 'math']]

Unnamed: 0,id,math
0,1,50
1,2,60
5,6,50
6,7,80
7,8,90
9,10,50
10,11,65
14,15,75
15,16,58
16,17,65


#### sort_values()

In [24]:
exam.sort_values('math')  # math 기준 오름차순 정렬

Unnamed: 0,id,nclass,math,english,science
8,9,3,20,98,15
4,5,2,25,80,65
3,4,1,30,98,58
2,3,1,45,86,78
11,12,3,45,85,32
12,13,4,46,98,65
13,14,4,48,87,12
0,1,1,50,98,50
9,10,3,50,98,45
5,6,2,50,89,98


In [None]:
exam.sort_values('math', ascending=False)  # math 기준 내림차순 정렬

Unnamed: 0,id,nclass,math,english,science
7,8,2,90,78,25
18,19,5,89,68,87
6,7,2,80,90,45
17,18,5,80,78,90
19,20,5,78,83,58
14,15,4,75,56,78
10,11,3,65,65,65
16,17,5,65,68,98
1,2,1,60,97,60
15,16,4,58,98,65


In [27]:
# total 컬럼 추가

exam['total'] = exam['math'] + exam['english'] + exam['science']
exam

Unnamed: 0,id,nclass,math,english,science,total
0,1,1,50,98,50,198
1,2,1,60,97,60,217
2,3,1,45,86,78,209
3,4,1,30,98,58,186
4,5,2,25,80,65,170
5,6,2,50,89,98,237
6,7,2,80,90,45,215
7,8,2,90,78,25,193
8,9,3,20,98,15,133
9,10,3,50,98,45,193


In [None]:
# 순위 구하기

exam['rank'] = exam['total'].rank(ascending=False).astype(int)
exam

Unnamed: 0,id,nclass,math,english,science,total,rank
0,1,1,50,98,50,198,12
1,2,1,60,97,60,217,7
2,3,1,45,86,78,209,10
3,4,1,30,98,58,186,16
4,5,2,25,80,65,170,17
5,6,2,50,89,98,237,3
6,7,2,80,90,45,215,8
7,8,2,90,78,25,193,14
8,9,3,20,98,15,133,20
9,10,3,50,98,45,193,14


In [29]:
# nclass 기준으로 오름차순, 만일 nclass가 같으면 math 내림차순으로 정렬

exam.sort_values(['nclass', 'math'], ascending=[True, False])

Unnamed: 0,id,nclass,math,english,science,total,rank
1,2,1,60,97,60,217,7
0,1,1,50,98,50,198,12
2,3,1,45,86,78,209,10
3,4,1,30,98,58,186,16
7,8,2,90,78,25,193,14
6,7,2,80,90,45,215,8
5,6,2,50,89,98,237,3
4,5,2,25,80,65,170,17
10,11,3,65,65,65,195,13
9,10,3,50,98,45,193,14
