In [None]:
# CÂU 1

import sqlite3
import pandas as pd

# Kết nối hoặc tạo database SQLite
conn = sqlite3.connect('university.db')
cursor = conn.cursor()

# Tạo bảng student
cursor.execute('''CREATE TABLE IF NOT EXISTS student (
    student_id INTEGER PRIMARY KEY,
    name TEXT,
    class TEXT,
    course_id INTEGER,
    score REAL)''')

# Tạo bảng course
cursor.execute('''CREATE TABLE IF NOT EXISTS course (
    id INTEGER PRIMARY KEY,
    course_name TEXT)''')

# Chèn dữ liệu vào bảng student
students = [
    (1, "Nguyen Minh Hoang", "May Tinh", 12, 6.7),
    (2, "Tran Thi Lan", "Kinh Te", 34, 9.2),
    (3, "Pham Van Nam", "Toan Tin", None, 7.9),
    (4, "Le Thanh Huyen", "Toan Tin", 20, 7.2),
    (5, "Vu Quoc Anh", "May Tinh", 24, 8.0),
    (6, "Dang Thuy Linh", "May Tinh", 24, 5.5),
    (7, "Bui Tien Dung", "Kinh Te", 34, 9.2),
    (8, "Ho Ngoc Mai", "Toan Tin", 20, 8.8),
    (9, "Duong Huu Phuc", "Kinh Te", None, 7.2),
    (10, "Cao Thi Hanh", "May Tinh", None, 7.0)
]
cursor.executemany("INSERT INTO student VALUES (?, ?, ?, ?, ?)", students)

# Chèn dữ liệu vào bảng course
courses = [
    (12, "Giai tich"),
    (34, "Thong ke"),
    (26, "Tin hoc")
]
cursor.executemany("INSERT INTO course VALUES (?, ?)", courses)

conn.commit()

# Truy vấn tích Decartes
df_cartesian = pd.read_sql_query("SELECT * FROM student, course", conn)
print(df_cartesian.head())

# INNER JOIN
df_inner = pd.read_sql_query('''SELECT * FROM student 
                                INNER JOIN course ON student.course_id = course.id''', conn)
print(df_inner.head())

# LEFT JOIN
df_left = pd.read_sql_query('''SELECT * FROM student 
                               LEFT JOIN course ON student.course_id = course.id''', conn)
print(df_left.head())

# RIGHT JOIN (SQLite không hỗ trợ, có thể dùng LEFT JOIN từ hướng khác)
df_right = pd.read_sql_query('''SELECT * FROM course 
                                LEFT JOIN student ON student.course_id = course.id''', conn)
print(df_right.head())

# FULL OUTER JOIN (Không hỗ trợ trực tiếp, dùng UNION)
df_full_outer = pd.read_sql_query('''SELECT * FROM student 
                                     LEFT JOIN course ON student.course_id = course.id
                                     UNION
                                     SELECT * FROM student 
                                     RIGHT JOIN course ON student.course_id = course.id''', conn)
print(df_full_outer.head())



   student_id               name     class  course_id  score  id course_name
0           1  Nguyen Minh Hoang  May Tinh       12.0    6.7  12   Giai tich
1           1  Nguyen Minh Hoang  May Tinh       12.0    6.7  26     Tin hoc
2           1  Nguyen Minh Hoang  May Tinh       12.0    6.7  34    Thong ke
3           2       Tran Thi Lan   Kinh Te       34.0    9.2  12   Giai tich
4           2       Tran Thi Lan   Kinh Te       34.0    9.2  26     Tin hoc
   student_id               name     class  course_id  score  id course_name
0           1  Nguyen Minh Hoang  May Tinh         12    6.7  12   Giai tich
1           2       Tran Thi Lan   Kinh Te         34    9.2  34    Thong ke
2           7      Bui Tien Dung   Kinh Te         34    9.2  34    Thong ke
   student_id               name     class  course_id  score    id course_name
0           1  Nguyen Minh Hoang  May Tinh       12.0    6.7  12.0   Giai tich
1           2       Tran Thi Lan   Kinh Te       34.0    9.2  34.0    Th

In [3]:
# CÂU 2

# Cập nhật các giá trị course_id bị thiếu bằng các id hợp lệ từ bảng course
cursor.execute('''UPDATE student 
                  SET course_id = (SELECT id FROM course ORDER BY RANDOM() LIMIT 1) 
                  WHERE course_id IS NULL''')

# Xóa bản ghi có course_id không tồn tại trong bảng course
cursor.execute('''DELETE FROM student 
                  WHERE course_id NOT IN (SELECT id FROM course)''')

conn.commit()

# Tổng số sinh viên, điểm trung bình theo từng lớp
df_class_avg = pd.read_sql_query('''SELECT class, COUNT(*) as total_students, AVG(score) as avg_score
                                    FROM student 
                                    GROUP BY class''', conn)
print(df_class_avg)


# Tổng số sinh viên, điểm trung bình của từng môn học
df_course_avg = pd.read_sql_query('''SELECT course_name, COUNT(*) as total_students, AVG(score) as avg_score
                                     FROM student 
                                     JOIN course ON student.course_id = course.id
                                     GROUP BY course_name''', conn)
print(df_course_avg)

# Phân loại thi đua 
df_rank = pd.read_sql_query('''SELECT course_name,
                                AVG(score) as avg_score,
                                CASE 
                                    WHEN AVG(score) >= 9.0 THEN 'Xuất sắc'
                                    WHEN AVG(score) BETWEEN 6.0 AND 8.9 THEN 'Tốt'
                                    ELSE 'Kém'
                                END as classification
                                FROM student 
                                JOIN course ON student.course_id = course.id
                                GROUP BY course_name''', conn)
print(df_rank)



      class  total_students  avg_score
0   Kinh Te               3   8.533333
1  May Tinh               2   6.850000
2  Toan Tin               1   7.900000
  course_name  total_students  avg_score
0   Giai tich               1        6.7
1    Thong ke               5        8.1
  course_name  avg_score classification
0   Giai tich        6.7            Tốt
1    Thong ke        8.1            Tốt


In [5]:
# CÂU 3

# Theo điểm số
df_rank_score = pd.read_sql_query('''SELECT student_id, name, score,
                                    RANK() OVER (ORDER BY score DESC) as rank
                                    FROM student''', conn)
print(df_rank_score.head(3))  # Top 3 cao nhất
print(df_rank_score.tail(3))  # Top 3 thấp nhất


# Điểm số theo lớp học
df_rank_class = pd.read_sql_query('''SELECT student_id, name, class, score,
                                    RANK() OVER (PARTITION BY class ORDER BY score DESC) as rank
                                    FROM student''', conn)
print(df_rank_class.head(3))

# Điểm số theo mã môn học
df_rank_course = pd.read_sql_query('''SELECT student_id, name, course_name, score,
                                    RANK() OVER (PARTITION BY course_name ORDER BY score DESC) as rank
                                    FROM student 
                                    JOIN course ON student.course_id = course.id''', conn)
print(df_rank_course.head(3))


   student_id           name  score  rank
0           2   Tran Thi Lan    9.2     1
1           7  Bui Tien Dung    9.2     1
2           3   Pham Van Nam    7.9     3
   student_id               name  score  rank
3           9     Duong Huu Phuc    7.2     4
4          10       Cao Thi Hanh    7.0     5
5           1  Nguyen Minh Hoang    6.7     6
   student_id            name    class  score  rank
0           2    Tran Thi Lan  Kinh Te    9.2     1
1           7   Bui Tien Dung  Kinh Te    9.2     1
2           9  Duong Huu Phuc  Kinh Te    7.2     3
   student_id               name course_name  score  rank
0           1  Nguyen Minh Hoang   Giai tich    6.7     1
1           2       Tran Thi Lan    Thong ke    9.2     1
2           7      Bui Tien Dung    Thong ke    9.2     1


In [3]:
# CÂU 4
import sqlite3
import datetime
import pandas as pd

# Kết nối CSDL SQLite
conn = sqlite3.connect("university.db")
cursor = conn.cursor()

# Kiểm tra xem cột graduation_date đã tồn tại chưa
cursor.execute("PRAGMA table_info(student)")
columns = [col[1] for col in cursor.fetchall()]

if "graduation_date" not in columns:
    cursor.execute("ALTER TABLE student ADD COLUMN graduation_date TEXT")  # Dùng TEXT thay vì DATETIME
    conn.commit()
    print("Đã thêm cột graduation_date.")
else:
    print("Cột graduation_date đã tồn tại, bỏ qua ALTER TABLE.")

# Cập nhật ngày tốt nghiệp (hiện tại + số tháng tương ứng với điểm số)
for row in cursor.execute("SELECT student_id, score FROM student"):
    student_id, score = row
    grad_date = (datetime.datetime.now() + datetime.timedelta(days=int(score * 30))).strftime('%Y-%m-%d')  # Chuyển sang dạng chuỗi
    cursor.execute("UPDATE student SET graduation_date = ? WHERE student_id = ?", (grad_date, student_id))

conn.commit()

# Kiểm tra kết quả
df_grad = pd.read_sql_query("SELECT student_id, name, graduation_date FROM student", conn)
print(df_grad)

# Đóng kết nối
conn.close()


Cột graduation_date đã tồn tại, bỏ qua ALTER TABLE.
   student_id               name graduation_date
0           1  Nguyen Minh Hoang      2025-09-29
1           2       Tran Thi Lan            None
2           3       Pham Van Nam            None
3           7      Bui Tien Dung            None
4           9     Duong Huu Phuc            None
5          10       Cao Thi Hanh            None
