## Import Library

In [3]:
import pandas as pd
import numpy as np

## Making Dummy Data

In [4]:
np.random.seed(42)

# daftar subjek
subjects = [
    "Pendidikan Agama", "Pendidikan Pancasila", "Bahasa Inggris", "Bahasa Mandarin",
    "Matematika (Umum)", "Biologi", "Fisika", "Kimia", "Geografi", "Sejarah",
    "Sosiologi", "Ekonomi", "Pendidikan Jasmani, Olahraga, dan Kesehatan",
    "Informatika", "Seni Musik", "Bahasa Indonesia"
]

# students yang bakal di generate
n_students = 1000

# Simulasi semester sekarang
low_count = int(0.2 * n_students)
mid_count = int(0.5 * n_students)
high_count = n_students - low_count - mid_count

low_avg = np.random.normal(loc=60, scale=5, size=low_count)
mid_avg = np.random.normal(loc=75, scale=5, size=mid_count)
high_avg = np.random.normal(loc=90, scale=5, size=high_count)

base_averages = np.concatenate([low_avg, mid_avg, high_avg])
base_averages = np.clip(base_averages, 0, 100)

# Simulasi skor subjek per murid
grades_current = np.array([
    base + np.random.normal(loc=0, scale=5, size=len(subjects))
    for base in base_averages
])
grades_current = np.clip(grades_current, 0, 100)

# dataframe
df = pd.DataFrame(grades_current, columns=subjects)
df['Rata-rata'] = df.mean(axis=1)

# Mask
low_mask = df['Rata-rata'] < 70
mid_mask = (df['Rata-rata'] >= 70) & (df['Rata-rata'] < 85)
high_mask = df['Rata-rata'] >= 85


df_next_sem = df[subjects].copy()

def add_noise(data, loc=0, scale=5):
    return np.clip(data + np.random.normal(loc=loc, scale=scale, size=data.shape), 0, 100)

# Low performers
indices = df[low_mask].index.to_numpy()
np.random.shuffle(indices)
n = len(indices)
df_next_sem.loc[indices[:int(0.7 * n)]] = add_noise(df_next_sem.loc[indices[:int(0.7 * n)]], loc=5, scale=4)
df_next_sem.loc[indices[int(0.7 * n):int(0.9 * n)]] = add_noise(df_next_sem.loc[indices[int(0.7 * n):int(0.9 * n)]], loc=0, scale=3)
df_next_sem.loc[indices[int(0.9 * n):]] = add_noise(df_next_sem.loc[indices[int(0.9 * n):]], loc=15, scale=5)

# Mid performers
indices = df[mid_mask].index.to_numpy()
np.random.shuffle(indices)
n = len(indices)
df_next_sem.loc[indices[:int(0.6 * n)]] = add_noise(df_next_sem.loc[indices[:int(0.6 * n)]], loc=0, scale=3)
df_next_sem.loc[indices[int(0.6 * n):int(0.9 * n)]] = add_noise(df_next_sem.loc[indices[int(0.6 * n):int(0.9 * n)]], loc=5, scale=4)
df_next_sem.loc[indices[int(0.9 * n):]] = add_noise(df_next_sem.loc[indices[int(0.9 * n):]], loc=-7, scale=5)

# High performers
indices = df[high_mask].index.to_numpy()
np.random.shuffle(indices)
n = len(indices)
df_next_sem.loc[indices[:int(0.8 * n)]] = add_noise(df_next_sem.loc[indices[:int(0.8 * n)]], loc=-3, scale=3)
df_next_sem.loc[indices[int(0.8 * n):]] = add_noise(df_next_sem.loc[indices[int(0.8 * n):]], loc=0, scale=2)

# pembulatan dan pergantian nama df
df_next_sem = df_next_sem.round().astype(int)
df_next_sem.columns = [col + " (Next Sem)" for col in df_next_sem.columns]

# kombinasi
df_final = pd.concat([df, df_next_sem], axis=1)
df_final['Rata-rata (Next Sem)'] = df_next_sem.mean(axis=1).round(2)


print(df_final.head())

   Pendidikan Agama  Pendidikan Pancasila  Bahasa Inggris  Bahasa Mandarin  \
0         69.480348             67.106739       62.781723        59.248887   
1         56.051588             56.889249       57.706942        61.429508   
2         65.234558             66.474422       60.822510        71.108377   
3         74.997851             74.515606       64.487336        69.594167   
4         57.017028             53.230884       52.355826        64.633367   

   Matematika (Umum)    Biologi     Fisika      Kimia   Geografi    Sejarah  \
0          65.974687  64.450998  66.959537  65.659430  67.731334  59.807395   
1          61.922856  56.440178  59.186906  70.020030  67.946394  61.490297   
2          57.109614  55.916568  64.360702  68.473934  71.658081  60.944021   
3          70.085300  68.918518  64.863624  64.257032  67.487379  73.478794   
4          56.490727  60.561753  58.594630  61.214437  59.213343  52.414272   

   ...  Kimia (Next Sem)  Geografi (Next Sem)  Sejarah (

In [5]:
df_final.to_csv('daftar_nilai_revised.csv', index=False)

## Tambah Kasus yang bervariasi
_Tambah siswa dengan nilai sangat rendah,etc.

## Tambah Kolom Absensi

## Hapus Kolom Tidak Digunakan

## Splitting Data

# 🧠 Feature Engineering

## Hitung Nilai Rata-rata per Siswa

## Normalisasi Nilai

## Tambah Fitur Tren Peningkatan/Penurunan Nilai

## Tambah Fitur Statistik

# 🏗️ Model Development (Compliance Focus)

## Bangun Model TensorFlow (tanpa API luar)

## PCA (jika diperlukan)

## Cross Validation

## Simpan Model

# 🧪 Evaluation

## Evaluasi dengan MAE, RMSE, R²

## Visualisasi Prediksi vs Nilai Asli

# 🚀 Inference & Integration

## Kode Inference Sederhana

## Integrasi ke UI atau API