<a href="https://colab.research.google.com/github/martinpdab/asapa/blob/main/machine_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install sklearn



In [2]:
import numpy as np
import pandas as pd

try:
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [39]:
URL = 'https://raw.githubusercontent.com/martinpdab/asapa/main/dataset/dataset-kasus-shuffled.csv'
df = pd.read_csv(URL)
df.head()

Unnamed: 0,tempat,jenis,banyak_kekerasan,banyak_korban,usia_korban,hubungan_pelaku,label
0,lainnya,penelantaran,1,1,dewasa,suami/istri,2
1,rumah-tangga,fisik,1,1,anak-anak,keluarga/saudara,3
2,lainnya,seksual,1,1,remaja,pacar/teman,3
3,rumah-tangga,fisik,1,1,remaja,keluarga/saudara,3
4,rumah-tangga,fisik,1,1,dewasa,suami/istri,3


In [40]:
train, test = train_test_split(df, test_size=0.1)
print(len(train), 'train examples')
print(len(test), 'test examples')

4697 train examples
522 test examples


In [42]:
def df_to_dataset(df, shuffle=True, batch_size=32):
  df = df.copy()
  labels = df.pop('label')
  ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(df))
  ds = ds.batch(batch_size)
  return ds

In [56]:
batch_size = 5
train_ds = df_to_dataset(train, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [45]:
for feature_batch, label_batch in train_ds.take(1):
  print('Parameter:', list(feature_batch.keys()))
  print('Tempat Kejadian:', feature_batch['tempat'])
  print('Jenis Kekerasan:', feature_batch['jenis'])
  print('Banyak Kekerasan:', feature_batch['banyak_kekerasan'])
  print('Banyak Korban:', feature_batch['banyak_korban'])
  print('Usia Korban:', feature_batch['usia_korban'])
  print('Hubungan Pelaku dengan Korban:', feature_batch['hubungan_pelaku'])
  print('Label:', label_batch )

Parameter: ['tempat', 'jenis', 'banyak_kekerasan', 'banyak_korban', 'usia_korban', 'hubungan_pelaku']
Tempat Kejadian: tf.Tensor([b'rumah-tangga' b'lainnya' b'lainnya' b'rumah-tangga' b'rumah-tangga'], shape=(5,), dtype=string)
Jenis Kekerasan: tf.Tensor([b'fisik' b'penelantaran' b'seksual' b'penelantaran' b'seksual'], shape=(5,), dtype=string)
Banyak Kekerasan: tf.Tensor([1 1 1 1 1], shape=(5,), dtype=int64)
Banyak Korban: tf.Tensor([1 1 1 1 1], shape=(5,), dtype=int64)
Usia Korban: tf.Tensor([b'dewasa' b'dewasa' b'anak-anak' b'dewasa' b'balita'], shape=(5,), dtype=string)
Hubungan Pelaku dengan Korban: tf.Tensor([b'suami/istri' b'keluarga/saudara' b'lainnya' b'suami/istri' b'orang-tua'], shape=(5,), dtype=string)
Label: tf.Tensor([3 2 3 2 3], shape=(5,), dtype=int64)


In [46]:
feature_columns = []

feature_columns.append(feature_column.numeric_column('banyak_korban'))
banyak_kekerasan = feature_column.numeric_column('banyak_kekerasan')
feature_columns.append(banyak_kekerasan)

tempat_kejadian = tf.feature_column.categorical_column_with_vocabulary_list(
                'tempat', ['rumah-tangga', 'tempat-kerja', 'sekolah', 'fasilitas-umum', 'lainnya'])
tempat_one_hot = tf.feature_column.indicator_column(tempat_kejadian)
feature_columns.append(tempat_one_hot)

jenis_kekerasan = tf.feature_column.categorical_column_with_vocabulary_list(
    'jenis', ['fisik', 'seksual', 'psikis', 'penelantaran', 'lainnya', 'eksploitasi', 'traficking'])
jenis_one_hot = tf.feature_column.indicator_column(jenis_kekerasan)
feature_columns.append(jenis_one_hot)

usia_korban = tf.feature_column.categorical_column_with_vocabulary_list(
    'usia_korban', ['balita', 'anak-anak', 'remaja', 'dewasa', 'lansia'])
usia_one_hot = tf.feature_column.indicator_column(usia_korban)
feature_columns.append(usia_one_hot)

hubungan_pelaku = tf.feature_column.categorical_column_with_vocabulary_list(
    'hubungan_pelaku', ['orang-tua', 'keluarga/saudara', 'tetangga', 'suami/istri', 'majikan', 'rekan-kerja', 'guru', 'pacar/teman', 'lainnya'])
hubungan_one_hot = tf.feature_column.indicator_column(hubungan_pelaku)
feature_columns.append(hubungan_one_hot)



feature_columns

[NumericColumn(key='banyak_korban', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 NumericColumn(key='banyak_kekerasan', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='tempat', vocabulary_list=('rumah-tangga', 'tempat-kerja', 'sekolah', 'fasilitas-umum', 'lainnya'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),
 IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='jenis', vocabulary_list=('fisik', 'seksual', 'psikis', 'penelantaran', 'lainnya', 'eksploitasi', 'traficking'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),
 IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='usia_korban', vocabulary_list=('balita', 'anak-anak', 'remaja', 'dewasa', 'lansia'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),
 IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='hubungan_pelaku', vocabular

In [55]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
feature_layer

<tensorflow.python.keras.feature_column.dense_features_v2.DenseFeatures at 0x7f236a1c2b90>

In [48]:
batch_size = 32
train_ds = df_to_dataset(train, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [57]:
estimator = tf.estimator.DNNClassifier(hidden_units=[256, 128, 64], 
                                       feature_columns=feature_columns, 
                                       n_classes=3, 
                                       model_dir='graphs/dnn')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'graphs/dnn', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [61]:
def create_train_input_fn(): 
    return tf.estimator.inputs.pandas_input_fn(
        x=train_ds,
        y=labels, 
        batch_size=32,
        num_epochs=50, # Repeat forever
        shuffle=True)

In [62]:
train_input_fn = create_train_input_fn()
estimator.train(train_input_fn, steps=2000)

AttributeError: ignored