In [8]:
import pandas as pd
import numpy as np

# FULL DATA: 26 players + 4 NEW distinguishing traits (all unique now!)
data = [
    ["ParasKhadka",1,1,1,1,1,1,1,1,1,0,1,0,0, 0,0,0,0],  # right, not opener, no ipl, not leg
    ["DipendraSinghAiree",1,1,1,1,1,1,1,1,0,1,0,0,1, 0,0,0,0],  # right offbreak
    ["SandeepLamichhane",1,1,1,1,1,0,1,0,1,1,0,0,1, 0,0,1,1],  # right legspin, ipl
    ["RohitPaudel",1,1,1,1,1,1,0,1,1,1,0,0,1, 0,0,0,0],  # right off
    ["SompalKami",1,1,1,1,1,0,1,1,0,0,1,0,0, 0,0,0,0],  # right fast
    ["GyanendraMalla",1,1,1,1,1,1,0,0,1,0,0,0,0, 0,0,0,0],  # right bat
    ["KaranKC",1,1,1,1,1,0,1,1,0,0,1,0,0, 0,0,0,0],  # right fast
    ["AasifSheikh",1,1,1,1,1,1,0,0,0,0,0,1,1, 0,1,0,0],  # right wk opener
    ["KushalBhurtel",1,1,1,1,1,1,1,1,0,1,0,0,1, 0,1,0,1],  # right legspin opener
    ["KushalMalla",1,1,1,1,1,1,1,1,0,1,0,0,1, 1,0,0,0],  # LEFT bat/arm
    ["GulshanJha",1,1,1,1,1,1,1,1,0,0,1,0,1, 1,0,0,0],  # LEFT bat pace
    ["AarifSheikh",1,1,1,1,1,1,1,1,0,0,1,0,1, 0,0,0,0],  # right med
    ["LalitRajbanshi",1,1,1,1,1,0,1,0,0,1,0,0,0, 1,0,0,0],  # left-arm orthodox
    ["AbinashBohara",1,1,1,1,1,0,1,0,0,0,1,0,0, 0,0,0,0],  # right pace
    ["BinodBhandari",1,1,1,1,1,1,0,0,0,0,0,1,0, 0,0,0,0],  # right wk older
    ["SundeepJora",1,1,1,1,1,1,0,0,0,0,0,0,1, 0,0,0,0],  # right middle
    ["NandanYadav",1,1,1,1,1,0,1,1,0,0,1,0,1, 0,0,0,0],  # right med
    ["LokeshBam",1,1,1,1,1,1,0,0,0,0,0,1,1, 0,0,0,0],  # right wk young
    ["AadilAnsari",1,1,1,1,1,1,1,1,0,0,1,0,1, 0,0,0,0],  # right medfast
    ["SahabAlam",1,1,1,1,1,0,1,0,0,1,0,0,0, 1,0,0,0],  # LEFT bat left-arm
    ["SherMalla",1,1,1,1,1,1,1,1,0,1,0,0,1, 0,0,0,0],  # right mystery/off
    ["BhimSharki",1,1,1,1,1,1,1,1,0,1,0,0,1, 0,0,0,0],  # right off
    ["IshanPandey",1,1,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0],  # right spin allr older
    ["BasirAhamad",1,1,1,1,1,1,1,1,0,1,0,0,1, 1,0,0,0],  # LEFT bat/arm
    ["PratishGC",1,1,1,1,1,0,1,0,0,0,1,0,1, 1,0,0,0],  # left-arm med (pace)
    ["RupeshKSingh",1,1,1,1,1,1,1,1,0,0,1,0,1, 0,0,0,0]   # right med
]

columns = ["character","real","male","alive","cricketer","national_team",
           "batsman","bowler","all_rounder","captain_experience",
           "spin_bowler","fast_bowler","wicket_keeper","young_player",
           "left_handed","opener","ipl_experience","leg_spinner"]

df = pd.DataFrame(data, columns=columns)
df.to_csv("nepali_cricket_akinator_perfect.csv", index=False)

# Human-readable questions (now 12!)
questions = {
    "batsman": "Primarily batsman (top/middle)?",
    "bowler": "Primarily bowler?",
    "all_rounder": "All-rounder (bats AND bowls)?",
    "captain_experience": "Captained senior Nepal team?",
    "spin_bowler": "Bowls spin?",
    "fast_bowler": "Bowls pace/fast-medium?",
    "wicket_keeper": "Wicket-keeper?",
    "young_player": "Young/emerging (born 2000+)?",
    "left_handed": "Left-handed batter OR left-arm bowler?",  # NEW!
    "opener": "Regular opener (top 2)?",  # NEW!
    "ipl_experience": "Played in IPL?",  # NEW!
    "leg_spinner": "Leg-spinner (googly/legbreak)?"  # NEW!
}
features = list(questions.keys())

def entropy(series):
    if len(series) <= 1: return 0
    _, counts = np.unique(series, return_counts=True)
    probs = counts / len(series)
    return -np.sum(probs * np.log2(probs + 1e-10))  # Avoid log0

def info_gain(df_remain, feature):
    h_parent = entropy(df_remain['character'].values)
    yes_df = df_remain[df_remain[feature] == 1]
    no_df = df_remain[df_remain[feature] == 0]
    n = len(df_remain)
    if len(yes_df) == 0 or len(no_df) == 0: return 0
    h_yes = entropy(yes_df['character'].values)
    h_no = entropy(no_df['character'].values)
    return h_parent - (len(yes_df)/n * h_yes + len(no_df)/n * h_no)

# GAME: Max 12 Qs, then guess best
print("ðŸ¤– Nepali Cricket Akinator v2.0! (Perfect - All Unique!)")
print("Think of one player. Answer Y/N. Max 12 Qs!\n")
remain = df.copy()
steps = 0
MAX_QS = 12
while len(remain) > 1 and steps < MAX_QS:
    steps += 1
    best_feat = max(features, key=lambda f: info_gain(remain, f))
    q = questions[best_feat]
    print(f"Q{steps}: {q}")
    ans = input("Y/N: ").strip().upper()
    if ans == 'Y': remain = remain[remain[best_feat] == 1]
    elif ans == 'N': remain = remain[remain[best_feat] == 0]
    else: print("Y/N only!"); steps -= 1; continue
    print(f"Left: {len(remain)} ({', '.join(remain['character'].tolist())})\n")

if len(remain) == 1:
    guessed = remain['character'].iloc[0]
    print(f"ðŸŽ‰ **{guessed}** in {steps} Qs!")
else:
    print(f"Max Qs! Top guesses: {', '.join(remain['character'].head(3).tolist())}")
    guessed = remain['character'].mode().iloc[0] if not remain['character'].mode().empty else remain['character'].iloc[0]

print("Correct? Y/N")
verify = input().strip().upper()
print("âœ… Great!" if verify == 'Y' else "ðŸ˜… Add more traits?")
print("\nDataset:")
print(df.drop(['real','male','alive','cricketer','national_team'], axis=1, errors='ignore'))

ðŸ¤– Nepali Cricket Akinator v2.0! (Perfect - All Unique!)
Think of one player. Answer Y/N. Max 12 Qs!

Q1: Bowls spin?


Y/N:  y


Left: 11 (DipendraSinghAiree, SandeepLamichhane, RohitPaudel, KushalBhurtel, KushalMalla, LalitRajbanshi, SahabAlam, SherMalla, BhimSharki, IshanPandey, BasirAhamad)

Q2: Left-handed batter OR left-arm bowler?


Y/N:  n


Left: 7 (DipendraSinghAiree, SandeepLamichhane, RohitPaudel, KushalBhurtel, SherMalla, BhimSharki, IshanPandey)

Q3: Captained senior Nepal team?


Y/N:  y


Left: 2 (SandeepLamichhane, RohitPaudel)

Q4: Primarily batsman (top/middle)?


Y/N:  n


Left: 1 (SandeepLamichhane)

ðŸŽ‰ **SandeepLamichhane** in 4 Qs!
Correct? Y/N


 y


âœ… Great!

Dataset:
             character  batsman  bowler  all_rounder  captain_experience  \
0          ParasKhadka        1       1            1                   1   
1   DipendraSinghAiree        1       1            1                   0   
2    SandeepLamichhane        0       1            0                   1   
3          RohitPaudel        1       0            1                   1   
4           SompalKami        0       1            1                   0   
5       GyanendraMalla        1       0            0                   1   
6              KaranKC        0       1            1                   0   
7          AasifSheikh        1       0            0                   0   
8        KushalBhurtel        1       1            1                   0   
9          KushalMalla        1       1            1                   0   
10          GulshanJha        1       1            1                   0   
11         AarifSheikh        1       1            1               