In [14]:
import pandas as pd

df = pd.read_csv('exercise.csv')

df_unique = df.drop_duplicates(subset=['main_muscle'], keep='first')

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

print(df_unique)

     exercise_id                  exercise_name      exercise_type                                     exercise_image main_muscle
0           3518           Stationary Bike Run   distance_duration  https://apilyfta.com/static/GymvisualPNG/22791...      cardio
25         27979  Dumbbell Standing Arms Rotate  db_2_simultaneous  https://apilyfta.com/static/GymvisualPNG/62671...    forearms
97           295               Smith Calf Raise        weight_reps  https://apilyfta.com/static/GymvisualPNG/11641...      calves
146            7                       Deadlift        weight_reps  https://apilyfta.com/static/GymvisualPNG/00321...        hips
199         1567  Barbell Straight Leg Deadlift        weight_reps  https://apilyfta.com/static/GymvisualPNG/01161...   hamstring
222          262                    Triceps Dip    full_bodyweight  https://apilyfta.com/static/GymvisualPNG/08141...     triceps
297          134                    Hammer Curl  db_2_simultaneous  https://apilyfta.com/s

In [None]:
%pip install fuzzywuzzy

In [18]:
import pandas as pd
from fuzzywuzzy import fuzz

# Load data
df = pd.read_csv('exercise.csv')

# Fungsi bantu untuk menggabungkan main_muscle unik jadi string
def combine_muscles(muscle_series):
    return ', '.join(sorted(muscle_series.unique()))

# List untuk menampung hasil final
final_rows = []

# Ambil semua index yang belum diproses
unprocessed = set(df.index)

while unprocessed:
    current_idx = unprocessed.pop()
    current_row = df.loc[current_idx]
    current_name = current_row['exercise_name']
    
    # Cari baris yang mirip dengan current_name (>=95%)
    similar_idxs = []
    for idx in list(unprocessed):
        other_name = df.loc[idx, 'exercise_name']
        score = fuzz.ratio(current_name.lower(), other_name.lower())
        if score >= 95:
            similar_idxs.append(idx)
    
    # Masukkan current_idx juga ke daftar group
    group_idxs = [current_idx] + similar_idxs
    
    # Gabungkan main_muscle unik dari group ini
    muscles = df.loc[group_idxs, 'main_muscle']
    combined_muscles = combine_muscles(muscles)
    
    # Ambil data dari current_row tapi update main_muscle
    new_row = current_row.copy()
    new_row['main_muscle'] = combined_muscles
    
    final_rows.append(new_row)
    
    # Hapus idx yang sudah diproses dari unprocessed
    unprocessed -= set(similar_idxs)

# Buat DataFrame baru hasil dedup dan gabungan main_muscle
df_final = pd.DataFrame(final_rows)

# Save ke CSV baru
df_final.to_csv('exercise_dedup_fuzzy.csv', index=False)

print(df_final)

     exercise_id                        exercise_name      exercise_type                                     exercise_image main_muscle
0           3518                 Stationary Bike Run   distance_duration  https://apilyfta.com/static/GymvisualPNG/22791...      cardio
1           4906                      Skip Jump Rope            duration  https://apilyfta.com/static/GymvisualPNG/36761...      cardio
2           1962                            Jump Rope           duration  https://apilyfta.com/static/GymvisualPNG/05111...      cardio
3           2136                                  Run  distance_duration  https://apilyfta.com/static/GymvisualPNG/06851...      cardio
4           3499                 Walking on Treadmill  distance_duration  https://apilyfta.com/static/GymvisualPNG/22591...      cardio
..           ...                                  ...                ...                                                ...         ...
775        27570  Roll Neck Decompress Lying on 

In [19]:
import pandas as pd
from fuzzywuzzy import fuzz

# Load data hasil dedup
df = pd.read_csv('exercise_dedup_fuzzy.csv')

# 1. Cek berapa data dan tampilkan pasangan nama yang mirip >= 95%
print(f"Total data latihan: {len(df)}")

mirip_pairs = []
names = df['exercise_name'].tolist()
for i in range(len(names)):
    for j in range(i+1, len(names)):
        score = fuzz.ratio(names[i].lower(), names[j].lower())
        if score >= 95:
            mirip_pairs.append((names[i], names[j], score))

print(f"Jumlah pasangan nama mirip (≥95%): {len(mirip_pairs)}")
for n1, n2, sc in mirip_pairs:
    print(f"'{n1}' <-> '{n2}' (similarity: {sc}%)")

# 2. Cek main_muscle yang cuma 1 dan lebih dari 1
df['muscle_count'] = df['main_muscle'].apply(lambda x: len(x.split(',')))

single_muscle = df[df['muscle_count'] == 1]
multi_muscle = df[df['muscle_count'] > 1]

print(f"\nJumlah data dengan main_muscle tunggal: {len(single_muscle)}")
print(f"Jumlah data dengan main_muscle lebih dari satu: {len(multi_muscle)}")

print("\nLatihan dengan main_muscle lebih dari satu:")
print(multi_muscle[['exercise_name', 'main_muscle']])

Total data latihan: 648
Jumlah pasangan nama mirip (≥95%): 0

Jumlah data dengan main_muscle tunggal: 637
Jumlah data dengan main_muscle lebih dari satu: 11

Latihan dengan main_muscle lebih dari satu:
                       exercise_name       main_muscle
21            Cross Body Hammer Curl  biceps, forearms
23   Dumbbell Cross Body Hammer Curl  biceps, forearms
24           Revers grip Biceps Curl  biceps, forearms
27            Dumbbell Hammer Curls   biceps, forearms
66                       Hammer Curl  biceps, forearms
130                      Bench Squat  hips, quadriceps
138              Barbell Bench Squat  hips, quadriceps
146                   Dumbbell Lunge  hips, quadriceps
147                            Squat  hips, quadriceps
148                            Lunge  hips, quadriceps
155                   Smith Deadlift  hips, quadriceps


In [28]:
import pandas as pd

df = pd.read_csv('exercise_dedup_renamed.csv')

# Cek missing value per kolom
missing_per_col = df.isna().sum()
print("Missing values per kolom:")
print(missing_per_col)

# Ambil baris yang memiliki minimal 1 missing value
missing_rows = df[df.isna().any(axis=1)]

print(f"\nJumlah baris dengan missing value: {len(missing_rows)}")
print("Baris dengan missing value:")
print(missing_rows)

Missing values per kolom:
exercise_id       0
exercise_name     0
exercise_type     0
body_part         0
exercise_image    1
dtype: int64

Jumlah baris dengan missing value: 1
Baris dengan missing value:
     exercise_id              exercise_name exercise_type body_part exercise_image
477        28040  Cable Single Arm High Row   weight_reps      back            NaN


In [30]:
import pandas as pd

# Load data (ganti dengan file terbaru jika perlu)
df = pd.read_csv('exercise_dedup_renamed.csv')

# Cek variasi dan jumlah pada kolom 'exercise_type'
print("Variasi dan jumlah 'exercise_type':")
print(df['exercise_type'].value_counts().sort_values(ascending=False))

print("\nVariasi dan jumlah 'body_part':")
# Untuk body_part yang bisa mengandung beberapa otot (dipisah koma), kita hitung semua komponen individual
from collections import Counter

# Pisah dan rapikan semua body part
all_parts = df['body_part'].dropna().apply(lambda x: [p.strip() for p in x.split(',')])
flat_parts = [item for sublist in all_parts for item in sublist]
part_counts = Counter(flat_parts)

# Ubah ke DataFrame untuk tampilan rapi
part_df = pd.DataFrame(part_counts.items(), columns=['body_part', 'count']).sort_values(by='count', ascending=False)
print(part_df.reset_index(drop=True))

Variasi dan jumlah 'exercise_type':
exercise_type
weight_reps            425
db_2_simultaneous      110
db_1_alt_sides          40
duration                22
db_1_both_sides         20
distance_duration       10
db_2_alt_legs            9
full_bodyweight          6
db_1_alt_legs            3
bodyweight_assisted      3
Name: count, dtype: int64

Variasi dan jumlah 'body_part':
     body_part  count
0       biceps     74
1         back     71
2        chest     64
3     forearms     62
4      triceps     61
5   quadriceps     60
6          abs     58
7    shoulders     53
8         hips     46
9       calves     39
10        neck     36
11      cardio     18
12   hamstring     17


In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

# Load CSV yang sudah dirapikan
df = pd.read_csv("exercise_dedup_renamed.csv")

# Widget dropdown untuk memilih kategori (type atau body part)
category_selector = widgets.Dropdown(
    options=['exercise_type', 'body_part'],
    description='Pilih opsi:',
    style={'description_width': 'initial'}
)

# Widget untuk input kategori spesifik (misalnya 'biceps', 'weight_reps')
input_box = widgets.Text(
    description='Masukkan nilai kategori:',
    placeholder='Contoh: biceps atau weight_reps',
    style={'description_width': 'initial'}
)

output = widgets.Output()

def on_category_selected(change):
    with output:
        clear_output()
        selected_category = category_selector.value
        unique_values = df[selected_category].dropna().unique()
        print(f"Pilihan yang tersedia untuk '{selected_category}':")
        for i, val in enumerate(sorted(unique_values), 1):
            print(f"{i}. {val}")

def on_input_submitted(change):
    with output:
        clear_output()
        selected_category = category_selector.value
        filter_value = input_box.value.strip()
        
        # Jika category adalah body_part, bisa mengandung banyak value (pisah koma)
        if selected_category == 'body_part':
            filtered_df = df[df[selected_category].str.contains(filter_value, na=False, case=False)]
        else:
            filtered_df = df[df[selected_category] == filter_value]
        
        if filtered_df.empty:
            print("Tidak ada data yang cocok.")
        else:
            # Tampilkan dalam tabel scrollable
            display(HTML('''
            <div style="height: 400px; overflow: auto; border:1px solid #ccc; padding:5px">
                {0}
            </div>
            '''.format(filtered_df.to_html(index=False, border=1))))

# Tampilkan dropdown dan input
category_selector.observe(on_category_selected, names='value')
input_box.on_submit(on_input_submitted)

display(category_selector)
display(input_box)
display(output)

  input_box.on_submit(on_input_submitted)


Dropdown(description='Pilih opsi:', options=('exercise_type', 'body_part'), style=DescriptionStyle(description…

Text(value='', description='Masukkan nilai kategori:', placeholder='Contoh: biceps atau weight_reps', style=Te…

Output()

In [40]:
import pandas as pd

# Baca CSV fuzzy renamed
df = pd.read_csv('exercise_dedup_renamed.csv')

# Pastikan kolom exercise_image ada
if 'exercise_image' in df.columns:
    # Simpan hanya kolom exercise_image ke CSV baru
    df[['exercise_image']].to_csv('exercise_image_urls_full.csv', index=False)
    print("CSV dengan URL gambar berhasil dibuat: 'exercise_image_urls_full.csv'")
else:
    print("Kolom 'exercise_image' tidak ditemukan di data.")

CSV dengan URL gambar berhasil dibuat: 'exercise_image_urls_full.csv'


In [45]:
import pandas as pd
from collections import Counter
import os

# Load data
df = pd.read_csv('exercise_image_urls_full.csv')

# Pastikan kolom exercise_image ada dan tidak kosong
image_urls = df['exercise_image'].dropna()

# Ambil ekstensi file dari URL (misal .png, .jpg)
extensions = image_urls.apply(lambda x: os.path.splitext(x)[1].lower())

# Hitung jumlah per ekstensi
ext_counts = Counter(extensions)

# Tampilkan total data dan hasil hitung ekstensi
print(f"Total data dengan URL gambar: {len(image_urls)}")
print("Jumlah file berdasarkan ekstensi:")
for ext, count in ext_counts.items():
    print(f"{ext}: {count}")

Total data dengan URL gambar: 647
Jumlah file berdasarkan ekstensi:
.png: 647


In [46]:
import pandas as pd

df = pd.read_csv('exercise_image_urls_full.csv')

# Ambil kolom exercise_image dan drop yang kosong
urls = df['exercise_image'].dropna()

# Hitung jumlah total dan jumlah unik
total_urls = len(urls)
unique_urls = urls.nunique()
duplicate_count = total_urls - unique_urls

print(f"Total URL gambar: {total_urls}")
print(f"Jumlah URL unik: {unique_urls}")
print(f"Jumlah URL duplikat: {duplicate_count}")

# Jika ingin lihat contoh link duplikat dan frekuensinya
duplicates = urls[urls.duplicated(keep=False)]
print("\nContoh link duplikat dan jumlah kemunculannya:")
print(duplicates.value_counts().head(10))

Total URL gambar: 647
Jumlah URL unik: 601
Jumlah URL duplikat: 46

Contoh link duplikat dan jumlah kemunculannya:
exercise_image
https://apilyfta.com/static/GymvisualPNG/02981101-Dumbbell-Cross-Body-Hammer-Curl_Forearms_small.png        2
https://apilyfta.com/static/GymvisualPNG/03691101-Dumbbell-Over-Bench-Wrist-Curl_Forearms_small.png         2
https://apilyfta.com/static/GymvisualPNG/03821101-Dumbbell-Revers-grip-Biceps-Curl_Forearms_small.png       2
https://apilyfta.com/static/GymvisualPNG/04011101-Dumbbell-Seated-Palms-Up-Wrist-Curl_Forearms_small.png    2
https://apilyfta.com/static/GymvisualPNG/03851101-Dumbbell-Reverse-Wrist-Curl_Forearm_small.png             2
https://apilyfta.com/static/GymvisualPNG/01251101-Barbell-Wrist-Curl-II_Forearms_small.png                  2
https://apilyfta.com/static/GymvisualPNG/00801101-Barbell-Reverse-Curl_Forearm_small.png                    2
https://apilyfta.com/static/GymvisualPNG/01101101-Barbell-Standing-Reverse-Grip-Curl_Forearms_small.

In [51]:
import pandas as pd

df = pd.read_csv('exercise_dedup_renamed.csv')

# Cari URL yang muncul lebih dari sekali
duplicate_urls = df['exercise_image'][df['exercise_image'].duplicated(keep=False)]

# Filter baris yang memiliki URL duplikat
df_duplicates = df[df['exercise_image'].isin(duplicate_urls)]

# Urutkan berdasarkan URL gambar
df_duplicates_sorted = df_duplicates.sort_values(by='exercise_image')

# Tampilkan seluruh kolom dan seluruh baris (tanpa batas)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

print(df_duplicates_sorted)

     exercise_id                                     exercise_name      exercise_type         body_part                                                                                                             exercise_image
138         1477                               Barbell Bench Squat        weight_reps  hips, quadriceps                                       https://apilyfta.com/static/GymvisualPNG/00261101-Barbell-Bench-Squat_Hips_small.png
130            3                                       Bench Squat        weight_reps  hips, quadriceps                                       https://apilyfta.com/static/GymvisualPNG/00261101-Barbell-Bench-Squat_Hips_small.png
133         1493                               Barbell Front Squat        weight_reps              hips                                   https://apilyfta.com/static/GymvisualPNG/00421101-Barbell-Front-Squat_Hips-FIX_small.png
125           17                                       Front Squat        weight_reps       